Spaces:

aliceblue11
/

image_comparison

Build error

File size: 11,917 Bytes

1b807db

import gradio as gr
import google.generativeai as genai
import PIL.Image
import difflib
import re
from typing import List, Tuple, Optional
import os
from dotenv import load_dotenv

# 환경변수 로드
load_dotenv()

# Google AI API 설정
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if GOOGLE_API_KEY:
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-1.5-flash')

class TextDiffChecker:
    def __init__(self):
        self.model = model if GOOGLE_API_KEY else None
    
    def extract_text_from_image(self, image: PIL.Image.Image) -> str:
        """이미지에서 텍스트 추출"""
        if not self.model:
            return "API 키가 설정되지 않았습니다."
        
        try:
            # Gemini Vision으로 텍스트 추출
            prompt = """
            이 이미지에서 모든 텍스트를 정확히 추출해주세요.
            - 배경은 무시하고 텍스트만 인식
            - 글자 크기, 색상, 폰트는 무시
            - 줄바꿈과 공백을 유지
            - 특수문자와 숫자도 모두 포함
            - 추출된 텍스트만 반환 (다른 설명 없이)
            """
            
            response = self.model.generate_content([prompt, image])
            return response.text.strip()
        
        except Exception as e:
            return f"텍스트 추출 오류: {str(e)}"
    
    def normalize_text(self, text: str) -> str:
        """텍스트 정규화 (공백, 줄바꿈 정리)"""
        # 연속된 공백을 하나로
        text = re.sub(r'\s+', ' ', text)
        # 앞뒤 공백 제거
        text = text.strip()
        return text
    
    def find_differences(self, text1: str, text2: str) -> Tuple[str, List[str]]:
        """두 텍스트의 차이점 찾기"""
        # 텍스트 정규화
        norm_text1 = self.normalize_text(text1)
        norm_text2 = self.normalize_text(text2)
        
        # 단어 단위로 분할
        words1 = norm_text1.split()
        words2 = norm_text2.split()
        
        # difflib를 사용한 차이점 분석
        differ = difflib.unified_diff(
            words1, words2, 
            fromfile='원본', tofile='비교본', 
            lineterm=''
        )
        
        diff_result = list(differ)
        
        # 차이점 요약
        differences = []
        added_words = []
        removed_words = []
        
        for line in diff_result:
            if line.startswith('+') and not line.startswith('+++'):
                added_words.extend(line[1:].split())
            elif line.startswith('-') and not line.startswith('---'):
                removed_words.extend(line[1:].split())
        
        if removed_words:
            differences.append(f"❌ 삭제된 단어: {', '.join(removed_words)}")
        
        if added_words:
            differences.append(f"✅ 추가된 단어: {', '.join(added_words)}")
        
        # HTML로 차이점 강조 표시
        html_diff = self.create_html_diff(words1, words2)
        
        return html_diff, differences
    
    def create_html_diff(self, words1: List[str], words2: List[str]) -> str:
        """HTML로 차이점 시각화"""
        matcher = difflib.SequenceMatcher(None, words1, words2)
        
        html_parts = []
        html_parts.append("<div style='font-family: monospace; line-height: 1.6;'>")
        html_parts.append("<h3>🔍 텍스트 비교 결과</h3>")
        
        # 원본 텍스트
        html_parts.append("<div style='margin: 10px 0;'>")
        html_parts.append("<strong>📄 원본:</strong><br>")
        html_parts.append("<div style='background: #f0f0f0; padding: 10px; border-radius: 5px; margin: 5px 0;'>")
        
        for tag, i1, i2, j1, j2 in matcher.get_opcodes():
            if tag == 'equal':
                html_parts.append(' '.join(words1[i1:i2]))
            elif tag == 'delete':
                html_parts.append(f"<span style='background: #ffcccc; text-decoration: line-through;'>{' '.join(words1[i1:i2])}</span>")
            elif tag == 'replace':
                html_parts.append(f"<span style='background: #ffcccc; text-decoration: line-through;'>{' '.join(words1[i1:i2])}</span>")
            
            if tag != 'equal':
                html_parts.append(" ")
        
        html_parts.append("</div></div>")
        
        # 비교본 텍스트
        html_parts.append("<div style='margin: 10px 0;'>")
        html_parts.append("<strong>📝 비교본:</strong><br>")
        html_parts.append("<div style='background: #f0f0f0; padding: 10px; border-radius: 5px; margin: 5px 0;'>")
        
        for tag, i1, i2, j1, j2 in matcher.get_opcodes():
            if tag == 'equal':
                html_parts.append(' '.join(words2[j1:j2]))
            elif tag == 'insert':
                html_parts.append(f"<span style='background: #ccffcc; font-weight: bold;'>{' '.join(words2[j1:j2])}</span>")
            elif tag == 'replace':
                html_parts.append(f"<span style='background: #ccffcc; font-weight: bold;'>{' '.join(words2[j1:j2])}</span>")
            
            if tag != 'equal':
                html_parts.append(" ")
        
        html_parts.append("</div></div>")
        html_parts.append("</div>")
        
        return ''.join(html_parts)

# 전역 인스턴스
checker = TextDiffChecker()

def process_comparison(image1, image2, text_input, comparison_mode):
    """메인 비교 처리 함수"""
    if not GOOGLE_API_KEY:
        return "❌ Google API 키를 설정해주세요.", "", []
    
    try:
        # 첫 번째 소스에서 텍스트 추출
        if image1 is not None:
            text1 = checker.extract_text_from_image(image1)
            source1_info = "📷 이미지 1에서 추출된 텍스트"
        else:
            return "❌ 첫 번째 이미지를 업로드해주세요.", "", []
        
        # 두 번째 소스 처리
        if comparison_mode == "이미지 vs 이미지":
            if image2 is not None:
                text2 = checker.extract_text_from_image(image2)
                source2_info = "📷 이미지 2에서 추출된 텍스트"
            else:
                return "❌ 두 번째 이미지를 업로드해주세요.", "", []
        else:  # 이미지 vs 텍스트
            if text_input.strip():
                text2 = text_input.strip()
                source2_info = "📝 입력된 텍스트"
            else:
                return "❌ 비교할 텍스트를 입력해주세요.", "", []
        
        # 추출된 텍스트 표시
        extracted_info = f"""
        ### 📋 추출된 텍스트
        
        **{source1_info}:**
        ```
        {text1}
        ```
        
        **{source2_info}:**
        ```
        {text2}
        ```
        """
        
        # 차이점 분석
        html_diff, differences = checker.find_differences(text1, text2)
        
        if not differences:
            differences = ["✅ 두 텍스트가 동일합니다!"]
        
        return extracted_info, html_diff, differences
    
    except Exception as e:
        return f"❌ 처리 중 오류 발생: {str(e)}", "", []

def create_interface():
    """Gradio 인터페이스 생성"""
    
    with gr.Blocks(
        title="📝 텍스트 비교 검수 시스템",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px;
            margin: auto;
        }
        .diff-output {
            border: 1px solid #ddd;
            border-radius: 8px;
            padding: 15px;
            background: white;
        }
        """
    ) as app:
        
        gr.Markdown("""
        # 📝 텍스트 비교 검수 시스템
        
        이미지의 텍스트를 인식하고 비교하여 차이점을 찾아드립니다.
        
        ⚙️ **기능:**
        - 🖼️ 이미지에서 텍스트 자동 추출 (Google Gemini Vision API)
        - 🔍 두 텍스트 간 차이점 정확한 분석
        - 📊 시각적 차이점 표시 (추가/삭제/변경)
        - 🎯 배경 무시, 텍스트만 집중 분석
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 📤 입력")
                
                comparison_mode = gr.Radio(
                    choices=["이미지 vs 이미지", "이미지 vs 텍스트"],
                    value="이미지 vs 이미지",
                    label="비교 모드"
                )
                
                image1 = gr.Image(
                    label="📷 첫 번째 이미지 (원본)",
                    type="pil"
                )
                
                with gr.Group() as image_group:
                    image2 = gr.Image(
                        label="📷 두 번째 이미지 (비교본)",
                        type="pil"
                    )
                
                with gr.Group(visible=False) as text_group:
                    text_input = gr.Textbox(
                        label="📝 비교할 텍스트",
                        placeholder="비교하고 싶은 텍스트를 직접 입력하세요...",
                        lines=5
                    )
                
                def toggle_input_mode(mode):
                    if mode == "이미지 vs 텍스트":
                        return gr.update(visible=False), gr.update(visible=True)
                    else:
                        return gr.update(visible=True), gr.update(visible=False)
                
                comparison_mode.change(
                    toggle_input_mode,
                    inputs=[comparison_mode],
                    outputs=[image_group, text_group]
                )
                
                analyze_btn = gr.Button(
                    "🔍 텍스트 비교 분석 시작",
                    variant="primary",
                    size="lg"
                )
        
            with gr.Column(scale=2):
                gr.Markdown("### 📊 분석 결과")
                
                with gr.Tabs():
                    with gr.TabItem("📋 추출된 텍스트"):
                        extracted_text = gr.Markdown()
                    
                    with gr.TabItem("🔍 시각적 비교"):
                        visual_diff = gr.HTML(elem_classes=["diff-output"])
                    
                    with gr.TabItem("📝 차이점 요약"):
                        differences_list = gr.JSON(label="발견된 차이점")
        
        # API 키 상태 표시
        if not GOOGLE_API_KEY:
            gr.Markdown("""
            ⚠️ **설정 필요:** 
            1. Google AI Studio에서 API 키를 발급받으세요
            2. `.env` 파일에 `GOOGLE_API_KEY=your_api_key` 추가
            3. 애플리케이션을 재시작하세요
            """)
        
        # 이벤트 연결
        analyze_btn.click(
            process_comparison,
            inputs=[image1, image2, text_input, comparison_mode],
            outputs=[extracted_text, visual_diff, differences_list]
        )
        
        # 예시 및 도움말
        gr.Markdown("""
        ### 💡 사용 팁
        - 📸 **고품질 이미지 사용**: 텍스트가 선명한 이미지일수록 정확도가 높아집니다
        - 🔤 **다양한 언어 지원**: 한글, 영어, 숫자, 특수문자 모두 인식 가능
        - 🎨 **배경 무시**: 복잡한 배경이 있어도 텍스트만 정확히 추출합니다
        - ⚡ **실시간 비교**: 업로드와 동시에 즉시 분석 결과를 확인할 수 있습니다
        """)
    
    return app

if __name__ == "__main__":
    app = create_interface()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )