File size: 1,379 Bytes
2bd9468
 
9902a40
87c119f
 
 
2bd9468
9902a40
891d8e1
2bd9468
87c119f
 
 
 
 
 
 
9902a40
 
87c119f
 
 
 
 
 
 
 
 
891d8e1
2bd9468
87c119f
2bd9468
13ed850
891d8e1
87c119f
 
2bd9468
3382a71
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from transformers import pipeline
from PIL import Image
from pydub import AudioSegment
from pydub.playback import play
import io

# ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
model = pipeline("image-classification", model="google/vit-base-patch16-224")

# ์นดํ…Œ๊ณ ๋ฆฌ์— ๋”ฐ๋ฅธ ์‚ฌ์šด๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
sound_files = {
    "dog": "dog_bark.mp3",
    "cat": "cat_meow.mp3",
    # ... ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ์— ๋Œ€ํ•œ ์‚ฌ์šด๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ถ”๊ฐ€
}

def classify_image(uploaded_image):
    predictions = model(uploaded_image)
    # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ์˜ˆ์ธก ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ ธ์˜ด
    top_prediction = predictions[0]['label']
    
    # ์˜ˆ์ธก ๊ฒฐ๊ณผ์— ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ ํŒŒ์ผ์„ ์žฌ์ƒ
    if top_prediction in sound_files:
        sound_path = sound_files[top_prediction]
        sound = AudioSegment.from_file(sound_path)
        play(sound)

    return {prediction['label']: prediction['score'] for prediction in predictions}

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(fn=classify_image,
                     inputs=gr.Image(type="pil"),
                     outputs=gr.Label(num_top_classes=3),
                     title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์‚ฌ์šด๋“œ ์žฌ์ƒ",
                     description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์Œํ–ฅ์„ ์žฌ์ƒํ•ฉ๋‹ˆ๋‹ค.")

# ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
iface.launch()