import gradio as gr from datasets import load_dataset from rapidfuzz import process, fuzz # ────────────────────────────────────────────────────────── # 1) Load dataset (streaming) ─ only metadata kept in memory # ────────────────────────────────────────────────────────── ds = load_dataset( "nyuuzyou/clker-svg", split="train", streaming=True, # .jsonl.zst → streamed automatically ) records = [] for ex in ds: records.append( { "id": ex["id"], "title": ex["title"] or "", "tags": " ".join(ex["tags"] or []), "svg": ex["svg_content"], "url": ex["download_url"], } ) # ────────────────────────────────────────────────────────── # 2) Search function # ────────────────────────────────────────────────────────── def search_svg(query: str, top_k: int): if not query.strip(): return "⚠️ Please enter a search term.", None # choices: index(int) ➜ single-line title+tags string choices = {i: f"{r['title']} {r['tags']}" for i, r in enumerate(records)} # Rapidfuzz: returns (choice_text, score, key) matched = process.extract( query, choices, scorer=fuzz.WRatio, limit=int(top_k), ) html_snippets = [] html_start = '' for _, score, idx in matched: # idx is actual list index r = records[idx] svg_html = ( '' ) html_snippets.append(svg_html) if not html_snippets: return "No results found.", None return "", html_start + ''.join(html_snippets) + html_end # ────────────────────────────────────────────────────────── # 3) Gradio UI # ────────────────────────────────────────────────────────── TITLE = "🔍 Clker SVG" DESCRIPTION = """ This application lets you quickly search public-domain SVG clip art using the “nyuuzyou/clker-svg” dataset. It finds similar items in titles and tags through fuzzy matching and shows them in a visual gallery. """ DISCORD_BADGE = """

badge

""" CSS = """ .gallery-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 20px; margin-top: 20px; } .gallery-item { border: 1px solid #ddd; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 5px rgba(0,0,0,0.1); transition: transform 0.3s; background: white; } .gallery-item:hover { transform: translateY(-5px); box-shadow: 0 5px 15px rgba(0,0,0,0.1); } .svg-container { height: 180px; display: flex; align-items: center; justify-content: center; padding: 10px; background: #f9f9f9; } .svg-container svg { max-width: 100%; max-height: 160px; } .item-details { padding: 15px; } .item-details h3 { margin: 0 0 10px 0; font-size: 16px; color: #333; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .score { font-size: 14px; color: #666; margin-bottom: 5px; } .tags { font-size: 12px; color: #888; margin-bottom: 10px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .download-link { display: inline-block; padding: 5px 10px; background: #4a90e2; color: white; text-decoration: none; border-radius: 4px; font-size: 12px; } .download-link:hover { background: #3a7bc8; } """ with gr.Blocks(title=TITLE, css=CSS) as demo: gr.Markdown(f"# {TITLE}") gr.Markdown(DESCRIPTION) gr.HTML(DISCORD_BADGE) with gr.Row(): with gr.Column(scale=4): query_box = gr.Textbox( label="Search term", placeholder="e.g. cat, tree, house, computer, flower...", show_label=True ) with gr.Column(scale=1): top_slider = gr.Slider( minimum=1, maximum=50, value=12, step=3, label="Number of results" ) with gr.Row(): search_button = gr.Button("Search", variant="primary") warning_md = gr.Markdown() output_html = gr.HTML() search_button.click( fn=search_svg, inputs=[query_box, top_slider], outputs=[warning_md, output_html], ) query_box.submit( fn=search_svg, inputs=[query_box, top_slider], outputs=[warning_md, output_html], ) if __name__ == "__main__": demo.launch()