File size: 3,243 Bytes
f799edd
 
 
 
c83bfa7
 
 
 
 
 
 
 
f799edd
 
 
 
 
 
 
 
 
 
 
 
 
c83bfa7
f799edd
c83bfa7
 
 
f799edd
 
c83bfa7
 
 
 
f799edd
 
 
 
c83bfa7
f799edd
 
 
c83bfa7
f799edd
 
c83bfa7
f799edd
 
 
 
c83bfa7
 
f799edd
 
 
 
 
c83bfa7
 
 
f799edd
 
 
c83bfa7
 
 
 
f799edd
c83bfa7
f799edd
c83bfa7
 
f799edd
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from datasets import load_dataset
from rapidfuzz import process, fuzz

# ──────────────────────────────────────────────────────────
# 1) 데이터셋 로드 (스트리밍) ─ 메모리에 메타만 저장
# ──────────────────────────────────────────────────────────
ds = load_dataset(
    "nyuuzyou/clker-svg",
    split="train",
    streaming=True,  # .jsonl.zst → 자동 스트리밍 해제
)

records = []
for ex in ds:
    records.append(
        {
            "id": ex["id"],
            "title": ex["title"] or "",
            "tags": " ".join(ex["tags"] or []),
            "svg": ex["svg_content"],
            "url": ex["download_url"],
        }
    )

# ──────────────────────────────────────────────────────────
# 2) 검색 함수
# ──────────────────────────────────────────────────────────
def search_svg(query: str, top_k: int):
    if not query.strip():
        return "⚠️ 검색어를 입력하세요.", None

    # choices: index(int) ➜ title+tags 한 줄 문자열
    choices = {i: f"{r['title']} {r['tags']}" for i, r in enumerate(records)}

    # Rapidfuzz: (choice_text, score, key) 반환
    matched = process.extract(
        query,
        choices,
        scorer=fuzz.WRatio,
        limit=int(top_k),
    )

    html_snippets = []
    for _choice_text, score, idx in matched:  # idx 가 실제 list 인덱스
        r = records[idx]
        svg_html = (
            "<div style='border:1px solid #ddd;margin:8px;padding:8px'>"
            f"<strong>{r['title']}</strong> "
            f"(score {score})<br>"
            f"<em>{r['tags']}</em><br>"
            f"<a href='{r['url']}' target='_blank'>원본&nbsp;다운로드</a><br>"
            f"{r['svg']}"  # 브라우저가 즉시 SVG 렌더
            "</div>"
        )
        html_snippets.append(svg_html)

    return "", "\n".join(html_snippets)

# ──────────────────────────────────────────────────────────
# 3) Gradio UI
# ──────────────────────────────────────────────────────────
with gr.Blocks(title="Clker SVG 검색") as demo:
    gr.Markdown("## 🔍 Clker.com SVG Public-Domain Clipart 검색")
    with gr.Row():
        query_box = gr.Textbox(label="검색어", placeholder="예: cat, tree, ...")
        top_slider = gr.Slider(1, 50, value=10, step=1, label="결과 개수")
    warning_md = gr.Markdown()
    output_html = gr.HTML()

    query_box.submit(
        fn=search_svg,
        inputs=[query_box, top_slider],
        outputs=[warning_md, output_html],
    )

if __name__ == "__main__":
    demo.launch()