File size: 3,552 Bytes
f83e64b
1
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: yolov10_webcam_stream"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio safetensors==0.4.3 opencv-python twilio gradio>=5.0,<6.0 gradio-webrtc==0.0.1 onnxruntime-gpu"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/yolov10_webcam_stream/inference.py\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/yolov10_webcam_stream/utils.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import cv2  # type: ignore\n", "from huggingface_hub import hf_hub_download\n", "from gradio_webrtc import WebRTC  # type: ignore\n", "from twilio.rest import Client  # type: ignore\n", "import os\n", "from inference import YOLOv10  # type: ignore\n", "\n", "model_file = hf_hub_download(\n", "    repo_id=\"onnx-community/yolov10n\", filename=\"onnx/model.onnx\"\n", ")\n", "\n", "model = YOLOv10(model_file)\n", "\n", "account_sid = os.environ.get(\"TWILIO_ACCOUNT_SID\")\n", "auth_token = os.environ.get(\"TWILIO_AUTH_TOKEN\")\n", "\n", "if account_sid and auth_token:\n", "    client = Client(account_sid, auth_token)\n", "\n", "    token = client.tokens.create()\n", "\n", "    rtc_configuration = {\n", "        \"iceServers\": token.ice_servers,\n", "        \"iceTransportPolicy\": \"relay\",\n", "    }\n", "else:\n", "    rtc_configuration = None\n", "\n", "\n", "def detection(image, conf_threshold=0.3):\n", "    image = cv2.resize(image, (model.input_width, model.input_height))\n", "    new_image = model.detect_objects(image, conf_threshold)\n", "    return cv2.resize(new_image, (500, 500))\n", "\n", "\n", "css = \"\"\".my-group {max-width: 600px !important; max-height: 600 !important;}\n", "                      .my-column {display: flex !important; justify-content: center !important; align-items: center !important};\"\"\"\n", "\n", "\n", "with gr.Blocks(css=css) as demo:\n", "    gr.HTML(\n", "        \"\"\"\n", "    <h1 style='text-align: center'>\n", "    YOLOv10 Webcam Stream (Powered by WebRTC \u26a1\ufe0f)\n", "    </h1>\n", "    \"\"\"\n", "    )\n", "    gr.HTML(\n", "        \"\"\"\n", "        <h3 style='text-align: center'>\n", "        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>\n", "        </h3>\n", "        \"\"\"\n", "    )\n", "    with gr.Column(elem_classes=[\"my-column\"]):\n", "        with gr.Group(elem_classes=[\"my-group\"]):\n", "            image = WebRTC(label=\"Stream\", rtc_configuration=rtc_configuration)\n", "            conf_threshold = gr.Slider(\n", "                label=\"Confidence Threshold\",\n", "                minimum=0.0,\n", "                maximum=1.0,\n", "                step=0.05,\n", "                value=0.30,\n", "            )\n", "\n", "        image.stream(\n", "            fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10\n", "        )\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}