File size: 1,644 Bytes
ae03257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import logging
from io import BytesIO
from PIL import Image
import gradio as gr
from google import genai
from google.genai import types

# 設定 logging
logging.basicConfig(
    filename='app.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# 初始化 Gemini API
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
client = genai.Client(api_key=GEMINI_API_KEY)

# 定義「圖解釋文」功能
def explain_image(image: Image.Image):
    # 將 PIL Image 轉成 Gemini 所需的格式
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    image_data = buffered.getvalue()

    # 準備輸入給 Gemini 的內容
    contents = [
        types.ContentPart.from_data(data=image_data, mime_type="image/png"),
        types.ContentPart.text("請用繁體中文說明這張圖片的內容。")
    ]

    # 呼叫 Gemini 模型
    response = client.models.generate_content(
        model="gemini-1.5-flash",
        contents=contents,
        config=types.GenerateContentConfig(response_modalities=["TEXT"])
    )

    # 回傳第一個回答
    explanation = response.candidates[0].content.parts[0].text
    logging.info("圖片說明成功取得。")
    return explanation

# Gradio 介面
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Gemini 圖片解釋器(圖 ➜ 文)")
    image_input = gr.Image(type="pil", label="上傳圖片")
    explain_button = gr.Button("解釋圖片")
    output_text = gr.Textbox(label="圖片說明", lines=5)

    explain_button.click(fn=explain_image, inputs=image_input, outputs=output_text)

if __name__ == "__main__":
    demo.launch()