Spaces:

protae5544
/

95win

Running

File size: 19,618 Bytes

fd34976

import gradio as gr
import pandas as pd
import io
import zipfile
from datetime import datetime
import traceback
import tempfile
import os

# ติดตั้ง dependencies ที่จำเป็น
try:
    from PyPDF2 import PdfReader, PdfWriter
    from reportlab.pdfgen import canvas
    from reportlab.lib.pagesizes import letter
    from reportlab.pdfbase import pdfmetrics
    from reportlab.pdfbase.ttfonts import TTFont
except ImportError as e:
    print(f"กำลังติดตั้ง dependencies: {e}")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "PyPDF2", "reportlab", "pandas"])
    from PyPDF2 import PdfReader, PdfWriter
    from reportlab.pdfgen import canvas
    from reportlab.lib.pagesizes import letter

def analyze_pdf_fields(pdf_path):
    """วิเคราะห์ฟิลด์ใน PDF"""
    try:
        reader = PdfReader(pdf_path)
        all_fields = {}
        
        # ตรวจสอบจาก AcroForm
        if reader.trailer.get("/Root") and reader.trailer["/Root"].get("/AcroForm"):
            acro_form = reader.trailer["/Root"]["/AcroForm"]
            if "/Fields" in acro_form:
                fields = acro_form["/Fields"]
                for field in fields:
                    field_obj = field.get_object()
                    if "/T" in field_obj:
                        field_name = str(field_obj["/T"]).strip("()")
                        field_type = str(field_obj.get("/FT", "Unknown"))
                        all_fields[field_name] = {
                            'type': field_type,
                            'method': 'AcroForm'
                        }
        
        # ตรวจสอบจาก Annotations
        for page_num, page in enumerate(reader.pages):
            if "/Annots" in page:
                try:
                    annotations = page["/Annots"]
                    for annotation in annotations:
                        annot_obj = annotation.get_object()
                        if annot_obj.get("/Subtype") == "/Widget":
                            if "/T" in annot_obj:
                                field_name = str(annot_obj["/T"]).strip("()")
                                field_type = str(annot_obj.get("/FT", "Widget"))
                                all_fields[field_name] = {
                                    'type': field_type,
                                    'page': page_num + 1,
                                    'method': 'Annotation'
                                }
                except Exception:
                    continue
        
        return all_fields
    except Exception as e:
        return {"error": str(e)}

def fill_pdf_form(pdf_path, field_data):
    """เติมข้อมูลในฟอร์ม PDF"""
    try:
        reader = PdfReader(pdf_path)
        writer = PdfWriter()
        
        # คัดลอกหน้าทั้งหมด
        for page in reader.pages:
            writer.add_page(page)
        
        # เติมข้อมูลในฟอร์ม
        if hasattr(writer, 'update_page_form_field_values'):
            for page_num, page in enumerate(writer.pages):
                try:
                    writer.update_page_form_field_values(page, field_data)
                except Exception:
                    pass
        
        # ลองวิธีอื่น
        elif "/AcroForm" in reader.trailer.get("/Root", {}):
            try:
                acro_form = reader.trailer["/Root"]["/AcroForm"]
                if "/Fields" in acro_form:
                    fields = acro_form["/Fields"]
                    for field in fields:
                        field_obj = field.get_object()
                        if "/T" in field_obj:
                            field_name = str(field_obj["/T"]).strip("()")
                            if field_name in field_data:
                                try:
                                    field_obj.update({"/V": field_data[field_name]})
                                except Exception:
                                    pass
            except Exception:
                pass
        
        return writer
    except Exception as e:
        raise Exception(f"ไม่สามารถเติมฟอร์มได้: {str(e)}")

def create_simple_pdf(data_row, filename):
    """สร้าง PDF ใหม่แบบง่าย"""
    buffer = io.BytesIO()
    p = canvas.Canvas(buffer, pagesize=letter)
    width, height = letter
    
    # ตั้งค่า font
    p.setFont("Helvetica", 12)
    
    # หัวเรื่อง
    p.setFont("Helvetica-Bold", 16)
    title = f"Document: {filename.replace('.pdf', '')}"
    p.drawString(50, height - 50, title)
    p.line(50, height - 60, 550, height - 60)
    
    # เนื้อหา
    y_position = height - 100
    p.setFont("Helvetica", 12)
    
    for column, value in data_row.items():
        if pd.notna(value) and str(value).strip():
            clean_column = str(column).strip()
            clean_value = str(value).strip()
            
            if len(clean_value) > 80:
                clean_value = clean_value[:77] + "..."
            
            text = f"{clean_column}: {clean_value}"
            
            try:
                p.drawString(50, y_position, text)
            except:
                safe_text = text.encode('ascii', errors='ignore').decode('ascii')
                p.drawString(50, y_position, safe_text)
            
            y_position -= 25
            
            if y_position < 50:
                p.showPage()
                p.setFont("Helvetica", 12)
                y_position = height - 50
    
    # เวลาที่สร้าง
    p.setFont("Helvetica", 8)
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    p.drawString(50, 30, f"Created: {timestamp}")
    
    p.save()
    buffer.seek(0)
    return buffer.getvalue()

def process_single_row(pdf_path, row_data, filename, use_form=True):
    """ประมวลผลแถวเดียว"""
    try:
        # เตรียมข้อมูลฟิลด์
        field_data = {}
        for column, value in row_data.items():
            if pd.notna(value) and str(value).strip():
                clean_value = str(value).strip()
                clean_column = str(column).strip()
                
                # ลองหลายรูปแบบของชื่อฟิลด์
                field_variations = [
                    clean_column,
                    clean_column.lower(),
                    clean_column.upper(),
                    clean_column.replace('_', ' '),
                    clean_column.replace(' ', '_'),
                    clean_column.replace('-', '_'),
                    clean_column.replace('_', '')
                ]
                
                for variation in field_variations:
                    field_data[variation] = clean_value
        
        if use_form:
            try:
                # ลองเติมฟอร์ม
                writer = fill_pdf_form(pdf_path, field_data)
                
                output_buffer = io.BytesIO()
                writer.write(output_buffer)
                output_buffer.seek(0)
                return output_buffer.getvalue(), "form_filled"
            except Exception as e:
                # ถ้าไม่ได้ ให้สร้างใหม่
                pdf_content = create_simple_pdf(row_data, filename)
                return pdf_content, f"new_pdf_created: {str(e)}"
        else:
            # สร้าง PDF ใหม่
            pdf_content = create_simple_pdf(row_data, filename)
            return pdf_content, "new_pdf_created"
            
    except Exception as e:
        return None, f"error: {str(e)}"

def read_csv_safe(csv_file):
    """อ่าน CSV อย่างปลอดภัย"""
    encodings = ['utf-8', 'utf-8-sig', 'cp874', 'tis-620', 'iso-8859-1', 'cp1252']
    separators = [',', ';', '\t', '|']
    
    for encoding in encodings:
        for sep in separators:
            try:
                df = pd.read_csv(csv_file, encoding=encoding, sep=sep, engine='python')
                if len(df.columns) > 1 and len(df) > 0:
                    return df, None
            except Exception:
                continue
    
    try:
        df = pd.read_csv(csv_file)
        return df, None
    except Exception as e:
        return None, str(e)

def process_pdf_csv(pdf_file, csv_file, filename_column, file_prefix, use_form_fields, progress=gr.Progress()):
    """ฟังก์ชันหลักสำหรับประมวลผล PDF และ CSV"""
    
    if pdf_file is None or csv_file is None:
        return None, "❌ กรุณาอัพโหลดไฟล์ PDF และ CSV"
    
    try:
        # อ่าน CSV
        df, csv_error = read_csv_safe(csv_file)
        if df is None:
            return None, f"❌ ไม่สามารถอ่าน CSV ได้: {csv_error}"
        
        # วิเคราะห์ PDF
        pdf_fields = analyze_pdf_fields(pdf_file)
        has_form_fields = bool(pdf_fields and "error" not in pdf_fields and pdf_fields)
        
        # เก็บ PDF ที่สร้าง
        generated_pdfs = {}
        success_count = 0
        error_count = 0
        processing_log = []
        
        # ประมวลผลแต่ละแถว
        for index, (_, row) in enumerate(df.iterrows()):
            progress((index + 1) / len(df), f"ประมวลผล {index + 1}/{len(df)}")
            
            try:
                # สร้างชื่อไฟล์
                if filename_column and filename_column in df.columns and pd.notna(row[filename_column]):
                    safe_name = str(row[filename_column]).strip()
                    safe_name = "".join(c for c in safe_name if c.isalnum() or c in (' ', '-', '_')).strip()
                    filename = f"{file_prefix}_{safe_name}.pdf"
                else:
                    filename = f"{file_prefix}_{index + 1:03d}.pdf"
                
                filename = filename.replace('  ', ' ').replace(' ', '_')
                if not filename.endswith('.pdf'):
                    filename += '.pdf'
                
                # ประมวลผล
                pdf_content, status = process_single_row(
                    pdf_file, 
                    row, 
                    filename, 
                    use_form_fields and has_form_fields
                )
                
                if pdf_content is not None:
                    generated_pdfs[filename] = pdf_content
                    success_count += 1
                    processing_log.append(f"✅ {filename}: {status}")
                else:
                    error_count += 1
                    processing_log.append(f"❌ {filename}: {status}")
                    
            except Exception as e:
                error_count += 1
                processing_log.append(f"💥 แถว {index + 1}: {str(e)}")
        
        # สร้าง ZIP
        if generated_pdfs:
            zip_buffer = io.BytesIO()
            with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                for filename, pdf_content in generated_pdfs.items():
                    zip_file.writestr(filename, pdf_content)
            
            zip_buffer.seek(0)
            
            # สร้างชื่อไฟล์ ZIP
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            zip_filename = f"generated_pdfs_{timestamp}.zip"
            
            # บันทึกไฟล์ชั่วคราว
            temp_zip_path = os.path.join(tempfile.gettempdir(), zip_filename)
            with open(temp_zip_path, 'wb') as f:
                f.write(zip_buffer.getvalue())
            
            result_message = f"✅ สร้าง PDF สำเร็จ {success_count} ไฟล์!"
            if error_count > 0:
                result_message += f"\n⚠️ มีข้อผิดพลาด {error_count} ไฟล์"
            
            result_message += f"\n\n📋 รายละเอียด:\n" + "\n".join(processing_log[:10])
            if len(processing_log) > 10:
                result_message += f"\n... และอีก {len(processing_log) - 10} รายการ"
            
            return temp_zip_path, result_message
        else:
            return None, "❌ ไม่สามารถสร้าง PDF ได้เลย"
            
    except Exception as e:
        return None, f"❌ เกิดข้อผิดพลาด: {str(e)}\n{traceback.format_exc()}"

def analyze_pdf_info(pdf_file):
    """วิเคราะห์ข้อมูล PDF"""
    if pdf_file is None:
        return "ไม่มีไฟล์ PDF"
    
    try:
        reader = PdfReader(pdf_file)
        info = f"📄 **ข้อมูล PDF:**\n"
        info += f"- จำนวนหน้า: {len(reader.pages)}\n"
        
        # ตรวจสอบฟิลด์
        pdf_fields = analyze_pdf_fields(pdf_file)
        
        if pdf_fields and "error" not in pdf_fields and pdf_fields:
            info += f"- จำนวน Form Fields: {len(pdf_fields)}\n"
            info += f"\n🏷️ **รายชื่อ Fields:**\n"
            for name, details in list(pdf_fields.items())[:10]:  # แสดงแค่ 10 ตัวแรก
                info += f"  - {name} ({details.get('type', 'Unknown')})\n"
            if len(pdf_fields) > 10:
                info += f"  - ... และอีก {len(pdf_fields) - 10} fields\n"
        else:
            info += "- Form Fields: ไม่พบหรือไม่สามารถอ่านได้\n"
            info += "- หมายเหตุ: จะสร้าง PDF ใหม่แทน\n"
        
        return info
    except Exception as e:
        return f"❌ ไม่สามารถวิเคราะห์ PDF ได้: {str(e)}"

def analyze_csv_info(csv_file):
    """วิเคราะห์ข้อมูล CSV"""
    if csv_file is None:
        return "ไม่มีไฟล์ CSV"
    
    try:
        df, error = read_csv_safe(csv_file)
        if df is None:
            return f"❌ ไม่สามารถอ่าน CSV ได้: {error}"
        
        info = f"📋 **ข้อมูล CSV:**\n"
        info += f"- จำนวนแถว: {len(df)}\n"
        info += f"- จำนวนคอลัมน์: {len(df.columns)}\n"
        info += f"\n📝 **รายชื่อคอลัมน์:**\n"
        
        for col in df.columns[:15]:  # แสดงแค่ 15 คอลัมน์แรก
            info += f"  - {col}\n"
        if len(df.columns) > 15:
            info += f"  - ... และอีก {len(df.columns) - 15} คอลัมน์\n"
        
        # ตรวจสอบข้อมูลที่ขาด
        missing_data = df.isnull().sum()
        if missing_data.any():
            missing_cols = missing_data[missing_data > 0]
            if len(missing_cols) > 0:
                info += f"\n⚠️ **ข้อมูลที่ขาดหาย:**\n"
                for col, count in missing_cols.head(5).items():
                    info += f"  - {col}: {count} แถว\n"
        
        return info
    except Exception as e:
        return f"❌ ไม่สามารถวิเคราะห์ CSV ได้: {str(e)}"

# สร้าง Gradio Interface
def create_interface():
    with gr.Blocks(title="PDF Form Filler", theme=gr.themes.Soft()) as app:
        gr.Markdown("""
        # 📄 เครื่องมือเติมข้อมูล PDF จาก CSV
        
        **เครื่องมือนี้สามารถ:**
        - เติมข้อมูลลงในฟอร์ม PDF ที่มี form fields
        - สร้าง PDF ใหม่หากไม่มี form fields หรือเติมไม่ได้
        - รองรับ CSV หลาย encoding (UTF-8, TIS-620, CP874, etc.)
        - ส่งออกเป็นไฟล์ ZIP
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("## 📁 อัพโหลดไฟล์")
                
                pdf_file = gr.File(
                    label="PDF Template",
                    file_types=[".pdf"],
                    type="filepath"
                )
                
                csv_file = gr.File(
                    label="CSV Data", 
                    file_types=[".csv"],
                    type="filepath"
                )
                
                gr.Markdown("## ⚙️ ตั้งค่า")
                
                filename_column = gr.Textbox(
                    label="คอลัมน์สำหรับชื่อไฟล์ (ถ้ามี)",
                    placeholder="เช่น name, id, etc.",
                    value=""
                )
                
                file_prefix = gr.Textbox(
                    label="คำนำหน้าชื่อไฟล์",
                    value="document"
                )
                
                use_form_fields = gr.Checkbox(
                    label="ใช้ Form Fields (ถ้าพบ)",
                    value=True
                )
                
                process_btn = gr.Button(
                    "🚀 สร้าง PDF ทั้งหมด",
                    variant="primary",
                    size="lg"
                )
            
            with gr.Column(scale=2):
                gr.Markdown("## 📊 ข้อมูลไฟล์")
                
                pdf_info = gr.Markdown("ยังไม่มีไฟล์ PDF")
                csv_info = gr.Markdown("ยังไม่มีไฟล์ CSV")
                
                gr.Markdown("## 📥 ผลลัพธ์")
                
                result_file = gr.File(
                    label="ไฟล์ ZIP ที่สร้าง",
                    visible=False
                )
                
                result_message = gr.Markdown("")
        
        # Event handlers
        pdf_file.change(
            fn=analyze_pdf_info,
            inputs=[pdf_file],
            outputs=[pdf_info]
        )
        
        csv_file.change(
            fn=analyze_csv_info,
            inputs=[csv_file],
            outputs=[csv_info]
        )
        
        process_btn.click(
            fn=process_pdf_csv,
            inputs=[
                pdf_file, 
                csv_file, 
                filename_column, 
                file_prefix, 
                use_form_fields
            ],
            outputs=[result_file, result_message]
        ).then(
            fn=lambda x: gr.update(visible=x is not None),
            inputs=[result_file],
            outputs=[result_file]
        )
    
    return app

# รันแอป
if __name__ == "__main__":
    app = create_interface()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,  # สร้าง public URL
        debug=True
    )