freddyaboulton HF Staff commited on
Commit
e641fd0
·
verified ·
1 Parent(s): d049777

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +244 -0
app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+
5
+ _docs = {'WebRTC':
6
+ {'description': 'Stream audio/video with WebRTC',
7
+ 'members': {'__init__':
8
+ {
9
+ 'rtc_configuration': {'type': 'dict[str, Any] | None', 'default': 'None', 'description': "The configration dictionary to pass to the RTCPeerConnection constructor. If None, the default configuration is used."},
10
+ 'height': {'type': 'int | str | None', 'default': 'None', 'description': 'The height of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. This has no effect on the preprocessed video file, but will affect the displayed video.'},
11
+ 'width': {'type': 'int | str | None', 'default': 'None', 'description': 'The width of the component, specified in pixels if a number is passed, or in CSS units if a string is passed. This has no effect on the preprocessed video file, but will affect the displayed video.'},
12
+ 'label': {'type': 'str | None', 'default': 'None', 'description': 'the label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.'},
13
+ 'show_label': {'type': 'bool | None', 'default': 'None', 'description': 'if True, will display label.'}, 'container': {'type': 'bool', 'default': 'True', 'description': 'if True, will place the component in a container - providing some extra padding around the border.'},
14
+ 'scale': {'type': 'int | None', 'default': 'None', 'description': 'relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True.'},
15
+ 'min_width': {'type': 'int', 'default': '160', 'description': 'minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first.'},
16
+ 'interactive': {'type': 'bool | None', 'default': 'None', 'description': 'if True, will allow users to upload a video; if False, can only be used to display videos. If not provided, this is inferred based on whether the component is used as an input or output.'}, 'visible': {'type': 'bool', 'default': 'True', 'description': 'if False, component will be hidden.'},
17
+ 'elem_id': {'type': 'str | None', 'default': 'None', 'description': 'an optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.'},
18
+ 'elem_classes': {'type': 'list[str] | str | None', 'default': 'None', 'description': 'an optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles.'},
19
+ 'render': {'type': 'bool', 'default': 'True', 'description': 'if False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later.'},
20
+ 'key': {'type': 'int | str | None', 'default': 'None', 'description': 'if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved.'},
21
+ 'mirror_webcam': {'type': 'bool', 'default': 'True', 'description': 'if True webcam will be mirrored. Default is True.'},
22
+ },
23
+ 'events': {'tick': {'type': None, 'default': None, 'description': ''}}}, '__meta__': {'additional_interfaces': {}, 'user_fn_refs': {'WebRTC': []}}}
24
+ }
25
+
26
+
27
+ abs_path = os.path.join(os.path.dirname(__file__), "css.css")
28
+
29
+ with gr.Blocks(
30
+ css_paths=abs_path,
31
+ theme=gr.themes.Default(
32
+ font_mono=[
33
+ gr.themes.GoogleFont("Inconsolata"),
34
+ "monospace",
35
+ ],
36
+ ),
37
+ ) as demo:
38
+ gr.Markdown(
39
+ """
40
+ <h1 style='text-align: center; margin-bottom: 1rem'> Gradio WebRTC ⚡️ </h1>
41
+
42
+ <div style="display: flex; flex-direction: row; justify-content: center">
43
+ <img style="display: block; padding-right: 5px; height: 20px;" alt="Static Badge" src="https://img.shields.io/badge/version%20-%200.0.5%20-%20orange">
44
+ <a href="https://github.com/freddyaboulton/gradio-webrtc" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/github-white?logo=github&logoColor=black"></a>
45
+ </div>
46
+ """, elem_classes=["md-custom"], header_links=True)
47
+ gr.Markdown(
48
+ """
49
+ ## Installation
50
+
51
+ ```bash
52
+ pip install gradio_webrtc
53
+ ```
54
+
55
+ ## Examples:
56
+ 1. [Object Detection from Webcam with YOLOv10](https://huggingface.co/spaces/freddyaboulton/webrtc-yolov10n) 📷
57
+ 2. [Streaming Object Detection from Video with RT-DETR](https://huggingface.co/spaces/freddyaboulton/rt-detr-object-detection-webrtc) 🎥
58
+ 3. [Text-to-Speech](https://huggingface.co/spaces/freddyaboulton/parler-tts-streaming-webrtc) 🗣️
59
+
60
+ ## Usage
61
+
62
+ The WebRTC component supports the following three use cases:
63
+ 1. Streaming video from the user webcam to the server and back
64
+ 2. Streaming Video from the server to the client
65
+ 3. Streaming Audio from the server to the client
66
+
67
+ Streaming Audio from client to the server and back (conversational AI) is not supported yet.
68
+
69
+
70
+ ## Streaming Video from the User Webcam to the Server and Back
71
+
72
+ ```python
73
+ import gradio as gr
74
+ from gradio_webrtc import WebRTC
75
+
76
+
77
+ def detection(image, conf_threshold=0.3):
78
+ ... your detection code here ...
79
+
80
+
81
+ with gr.Blocks() as demo:
82
+ image = WebRTC(label="Stream", mode="send-receive", modality="video")
83
+ conf_threshold = gr.Slider(
84
+ label="Confidence Threshold",
85
+ minimum=0.0,
86
+ maximum=1.0,
87
+ step=0.05,
88
+ value=0.30,
89
+ )
90
+ image.stream(
91
+ fn=detection,
92
+ inputs=[image, conf_threshold],
93
+ outputs=[image], time_limit=10
94
+ )
95
+
96
+ if __name__ == "__main__":
97
+ demo.launch()
98
+
99
+ ```
100
+ * Set the `mode` parameter to `send-receive` and `modality` to "video".
101
+ * The `stream` event's `fn` parameter is a function that receives the next frame from the webcam
102
+ as a **numpy array** and returns the processed frame also as a **numpy array**.
103
+ * Numpy arrays are in (height, width, 3) format where the color channels are in RGB format.
104
+ * The `inputs` parameter should be a list where the first element is the WebRTC component. The only output allowed is the WebRTC component.
105
+ * The `time_limit` parameter is the maximum time in seconds the video stream will run. If the time limit is reached, the video stream will stop.
106
+
107
+ ## Streaming Video from the User Webcam to the Server and Back
108
+
109
+ ```python
110
+ import gradio as gr
111
+ from gradio_webrtc import WebRTC
112
+ import cv2
113
+
114
+ def generation():
115
+ url = "https://download.tsi.telecom-paristech.fr/gpac/dataset/dash/uhd/mux_sources/hevcds_720p30_2M.mp4"
116
+ cap = cv2.VideoCapture(url)
117
+ iterating = True
118
+ while iterating:
119
+ iterating, frame = cap.read()
120
+ yield frame
121
+
122
+ with gr.Blocks() as demo:
123
+ output_video = WebRTC(label="Video Stream", mode="receive", modality="video")
124
+ button = gr.Button("Start", variant="primary")
125
+ output_video.stream(
126
+ fn=generation, inputs=None, outputs=[output_video],
127
+ trigger=button.click
128
+ )
129
+
130
+ if __name__ == "__main__":
131
+ demo.launch()
132
+ ```
133
+
134
+ * Set the "mode" parameter to "receive" and "modality" to "video".
135
+ * The `stream` event's `fn` parameter is a generator function that yields the next frame from the video as a **numpy array**.
136
+ * The only output allowed is the WebRTC component.
137
+ * The `trigger` parameter the gradio event that will trigger the webrtc connection. In this case, the button click event.
138
+
139
+ ## Streaming Audio from the Server to the Client
140
+
141
+ ```python
142
+ import gradio as gr
143
+ from pydub import AudioSegment
144
+
145
+ def generation(num_steps):
146
+ for _ in range(num_steps):
147
+ segment = AudioSegment.from_file("/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav")
148
+ yield (segment.frame_rate, np.array(segment.get_array_of_samples()).reshape(1, -1))
149
+
150
+ with gr.Blocks() as demo:
151
+ audio = WebRTC(label="Stream", mode="receive", modality="audio")
152
+ num_steps = gr.Slider(
153
+ label="Number of Steps",
154
+ minimum=1,
155
+ maximum=10,
156
+ step=1,
157
+ value=5,
158
+ )
159
+ button = gr.Button("Generate")
160
+
161
+ audio.stream(
162
+ fn=generation, inputs=[num_steps], outputs=[audio],
163
+ trigger=button.click
164
+ )
165
+ ```
166
+
167
+ * Set the "mode" parameter to "receive" and "modality" to "audio".
168
+ * The `stream` event's `fn` parameter is a generator function that yields the next audio segment as a tuple of (frame_rate, audio_samples).
169
+ * The numpy array should be of shape (1, num_samples).
170
+ * The `outputs` parameter should be a list with the WebRTC component as the only element.
171
+
172
+ ## Deployment
173
+
174
+ When deploying in a cloud environment (like Hugging Face Spaces, EC2, etc), you need to set up a TURN server to relay the WebRTC traffic.
175
+ The easiest way to do this is to use a service like Twilio.
176
+
177
+ ```python
178
+ from twilio.rest import Client
179
+ import os
180
+
181
+ account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
182
+ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
183
+
184
+ client = Client(account_sid, auth_token)
185
+
186
+ token = client.tokens.create()
187
+
188
+ rtc_configuration = {
189
+ "iceServers": token.ice_servers,
190
+ "iceTransportPolicy": "relay",
191
+ }
192
+
193
+ with gr.Blocks() as demo:
194
+ ...
195
+ rtc = WebRTC(rtc_configuration=rtc_configuration, ...)
196
+ ...
197
+ ```
198
+ """, elem_classes=["md-custom"], header_links=True)
199
+
200
+
201
+ gr.Markdown("""
202
+ ##
203
+ """, elem_classes=["md-custom"], header_links=True)
204
+
205
+ gr.ParamViewer(value=_docs["WebRTC"]["members"]["__init__"], linkify=[])
206
+
207
+
208
+ demo.load(None, js=r"""function() {
209
+ const refs = {};
210
+ const user_fn_refs = {
211
+ WebRTC: [], };
212
+ requestAnimationFrame(() => {
213
+
214
+ Object.entries(user_fn_refs).forEach(([key, refs]) => {
215
+ if (refs.length > 0) {
216
+ const el = document.querySelector(`.${key}-user-fn`);
217
+ if (!el) return;
218
+ refs.forEach(ref => {
219
+ el.innerHTML = el.innerHTML.replace(
220
+ new RegExp("\\b"+ref+"\\b", "g"),
221
+ `<a href="#h-${ref.toLowerCase()}">${ref}</a>`
222
+ );
223
+ })
224
+ }
225
+ })
226
+
227
+ Object.entries(refs).forEach(([key, refs]) => {
228
+ if (refs.length > 0) {
229
+ const el = document.querySelector(`.${key}`);
230
+ if (!el) return;
231
+ refs.forEach(ref => {
232
+ el.innerHTML = el.innerHTML.replace(
233
+ new RegExp("\\b"+ref+"\\b", "g"),
234
+ `<a href="#h-${ref.toLowerCase()}">${ref}</a>`
235
+ );
236
+ })
237
+ }
238
+ })
239
+ })
240
+ }
241
+
242
+ """)
243
+
244
+ demo.launch()