Yaron Koresh commited on
Commit
fc523d1
·
verified ·
1 Parent(s): d5d1663

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -107
app.py CHANGED
@@ -27,16 +27,13 @@ from safetensors.torch import load_file, save_file
27
  from diffusers import DiffusionPipeline, AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, DDIMScheduler, StableDiffusionXLPipeline, UNet2DConditionModel, AutoencoderKL, UNet3DConditionModel
28
  #import jax
29
  #import jax.numpy as jnp
30
- from numba import njit as cpu1, jit as cpu2, cuda
31
  from numba.cuda import jit as gpu
32
 
33
  # optimization:
34
 
35
  # @gpu(cache=True)
36
- # @cpu1(cache=True,nopython=True,parallel=True)
37
- # @cpu2(cache=True,nopython=True,parallel=True)
38
- # @cpu1(cache=True)
39
- # @cpu2(cache=True)
40
 
41
  # logging
42
 
@@ -118,18 +115,19 @@ function custom(){
118
 
119
  # functionality
120
 
121
- @gpu(cache=True)
122
- # @cpu1(cache=True,nopython=True,parallel=True)
123
- # @cpu2(cache=True,nopython=True,parallel=True)
124
- # @cpu1(cache=True)
125
- # @cpu2(cache=True)
126
- def run(*args):
127
- tx = cuda.threadIdx.x
128
- bx = cuda.blockIdx.x
129
- dx = cuda.blockDim.x
130
- pos = tx + bx * dx
131
-
132
- cmd=args[0]
 
133
 
134
  result = subprocess.run(cmd, shell=True, capture_output=True, env=None)
135
  if result.returncode != 0:
@@ -139,18 +137,20 @@ def run(*args):
139
  sys.exit()
140
  return result
141
 
142
- @gpu(cache=True)
143
- # @cpu1(cache=True,nopython=True,parallel=True)
144
- # @cpu2(cache=True,nopython=True,parallel=True)
145
- # @cpu1(cache=True)
146
- # @cpu2(cache=True)
147
- def translate(*args):
148
- tx = cuda.threadIdx.x
149
- bx = cuda.blockIdx.x
150
- dx = cuda.blockDim.x
151
- pos = tx + bx * dx
152
-
153
- text,lang=args
 
 
154
 
155
  if text == None or lang == None:
156
  return ""
@@ -181,7 +181,7 @@ def translate(*args):
181
  translated = text
182
  try:
183
  src_lang = html.xpath('//*[@class="source-language"]')[0].text_content().lower().strip()
184
- trgt_lang = html.xpath('//*[@class="target-language"]')[0].text_content().lower().strip()
185
  src_text = html.xpath('//*[@id="tw-source-text"]/*')[0].text_content().lower().strip()
186
  trgt_text = html.xpath('//*[@id="tw-target-text"]/*')[0].text_content().lower().strip()
187
  if trgt_lang == lang:
@@ -192,34 +192,39 @@ def translate(*args):
192
  print(ret)
193
  return ret
194
 
195
- @gpu(cache=True)
196
- # @cpu1(cache=True,nopython=True,parallel=True)
197
- # @cpu2(cache=True,nopython=True,parallel=True)
198
- # @cpu1(cache=True)
199
- # @cpu2(cache=True)
200
- def generate_random_string(*args):
201
- tx = cuda.threadIdx.x
202
- bx = cuda.blockIdx.x
203
- dx = cuda.blockDim.x
204
- pos = tx + bx * dx
205
-
206
- length=args[0]
 
207
 
208
  characters = string.ascii_letters + string.digits
209
  return ''.join(random.choice(characters) for _ in range(length))
210
 
211
  @gpu(cache=True)
212
- # @cpu1(cache=True,nopython=True,parallel=True)
213
- # @cpu2(cache=True,nopython=True,parallel=True)
214
- # @cpu1(cache=True)
215
- # @cpu2(cache=True)
216
- def Piper(*args):
217
- tx = cuda.threadIdx.x
218
- bx = cuda.blockIdx.x
219
- dx = cuda.blockDim.x
220
- pos = tx + bx * dx
221
-
222
- image,positive,negative,motion=args
 
 
 
 
223
 
224
  global last_motion
225
  global ip_loaded
@@ -257,22 +262,23 @@ def Piper(*args):
257
  )
258
 
259
  @gpu(cache=True)
260
- # @cpu1(cache=True,nopython=True,parallel=True)
261
- # @cpu2(cache=True,nopython=True,parallel=True)
262
- # @cpu1(cache=True)
263
- # @cpu2(cache=True)
264
- def infer(args):
265
- tx = cuda.threadIdx.x
266
- bx = cuda.blockIdx.x
267
- dx = cuda.blockDim.x
268
- pos = tx + bx * dx
269
-
270
- pm = args[0]
 
271
 
272
  print("infer: started")
273
 
274
  p1 = pm["p"]
275
- name = generate_random_string[32,32](12)+".png"
276
 
277
  neg = pm["n"]
278
  if neg != "":
@@ -285,44 +291,50 @@ def infer(args):
285
 
286
  if pm["i"] == None:
287
  return None
288
- out = Piper[32,32](pm["i"],posi,neg,pm["m"])
289
  export_to_gif(out.frames[0],name,fps=fps)
290
  return name
291
 
292
- @gpu(cache=True)
293
- # @cpu1(cache=True,nopython=True,parallel=True)
294
- # @cpu2(cache=True,nopython=True,parallel=True)
295
- # @cpu1(cache=True)
296
- # @cpu2(cache=True)
297
- def handle(*args):
298
- tx = cuda.threadIdx.x
299
- bx = cuda.blockIdx.x
300
- dx = cuda.blockDim.x
301
- pos = tx + bx * dx
302
-
303
- i,m,p1,p2,*result=args
304
-
305
- p1_en = translate[32,32](p1,"english")
306
- p2_en = translate[32,32](p2,"english")
 
 
 
 
 
307
  pm = {"p":p1_en,"n":p2_en,"m":m,"i":i}
308
  ln = len(result)
309
  rng = list(range(ln))
310
  arr = [pm for _ in rng]
311
  #with Pool(f'{ ln }:ppn=2', queue='productionQ', timelimit='5:00:00', workdir='.') as pool:
312
  #return pool.map(infer,arr)
313
- ret = infer[32+ln,32](pm)
314
  return ret
315
 
316
- @gpu(cache=True)
317
- # @cpu1(cache=True,nopython=True,parallel=True)
318
- # @cpu2(cache=True,nopython=True,parallel=True)
319
- # @cpu1(cache=True)
320
- # @cpu2(cache=True)
321
  def ui():
322
- tx = cuda.threadIdx.x
323
- bx = cuda.blockIdx.x
324
- dx = cuda.blockDim.x
325
- pos = tx + bx * dx
 
 
 
326
 
327
  with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
328
  with gr.Column(elem_id="col-container"):
@@ -372,20 +384,21 @@ def ui():
372
 
373
  gr.on(
374
  triggers=[run_button.click, prompt.submit, prompt2.submit],
375
- fn=handle[32,32],inputs=[img,motion,prompt,prompt2,*result],outputs=result
376
  )
377
  demo.queue().launch()
378
 
379
- @gpu(cache=True)
380
- # @cpu1(cache=True,nopython=True,parallel=True)
381
- # @cpu2(cache=True,nopython=True,parallel=True)
382
- # @cpu1(cache=True)
383
- # @cpu2(cache=True)
384
  def pre():
385
- tx = cuda.threadIdx.x
386
- bx = cuda.blockIdx.x
387
- dx = cuda.blockDim.x
388
- pos = tx + bx * dx
 
 
 
389
 
390
  pipe = AnimateDiffPipeline.from_pretrained(base, vae=vae, motion_adapter=adapter, torch_dtype=dtype).to(device)
391
  pipe.scheduler = DDIMScheduler(
@@ -402,15 +415,21 @@ def pre():
402
  pipe.enable_free_init(method="butterworth", use_fast_sampling=fast)
403
 
404
  # @gpu(cache=True)
405
- # @cpu1(cache=True,nopython=True,parallel=True)
406
- # @cpu2(cache=True,nopython=True,parallel=True)
407
- @cpu1(cache=True)
408
- # @cpu2(cache=True)
409
  def entry():
 
 
 
 
 
 
 
 
410
  os.chdir(os.path.abspath(os.path.dirname(__file__)))
411
  mp.set_start_method("spawn", force=True)
412
- pre[32,32]()
413
- ui[32,32]()
414
 
415
  # entry
416
 
 
27
  from diffusers import DiffusionPipeline, AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, DDIMScheduler, StableDiffusionXLPipeline, UNet2DConditionModel, AutoencoderKL, UNet3DConditionModel
28
  #import jax
29
  #import jax.numpy as jnp
30
+ from numba import jit as cpu, cuda
31
  from numba.cuda import jit as gpu
32
 
33
  # optimization:
34
 
35
  # @gpu(cache=True)
36
+ # @cpu(cache=True)
 
 
 
37
 
38
  # logging
39
 
 
115
 
116
  # functionality
117
 
118
+ # @gpu(cache=True)
119
+ @cpu(cache=True,nopython=True,parallel=True)
120
+ # @cpu(cache=True)
121
+ def run(cmd):
122
+ try:
123
+ tx = cuda.threadIdx.x
124
+ bx = cuda.blockIdx.x
125
+ dx = cuda.blockDim.x
126
+ pos = tx + bx * dx
127
+ except:
128
+ pos = 0
129
+
130
+ cmd=cmd[pos]
131
 
132
  result = subprocess.run(cmd, shell=True, capture_output=True, env=None)
133
  if result.returncode != 0:
 
137
  sys.exit()
138
  return result
139
 
140
+ # @gpu(cache=True)
141
+ @cpu(cache=True,nopython=True,parallel=True)
142
+ # @cpu(cache=True)
143
+ def translate(args):
144
+ try:
145
+ tx = cuda.threadIdx.x
146
+ bx = cuda.blockIdx.x
147
+ dx = cuda.blockDim.x
148
+ pos = tx + bx * dx
149
+ except:
150
+ pos = 0
151
+
152
+ text=text[pos]
153
+ lang=lang[pos]
154
 
155
  if text == None or lang == None:
156
  return ""
 
181
  translated = text
182
  try:
183
  src_lang = html.xpath('//*[@class="source-language"]')[0].text_content().lower().strip()
184
+ trgt_lang = html.xpath'//*[@class="target-language"]')[0].text_content().lower().strip()
185
  src_text = html.xpath('//*[@id="tw-source-text"]/*')[0].text_content().lower().strip()
186
  trgt_text = html.xpath('//*[@id="tw-target-text"]/*')[0].text_content().lower().strip()
187
  if trgt_lang == lang:
 
192
  print(ret)
193
  return ret
194
 
195
+ # @gpu(cache=True)
196
+ @cpu(cache=True,nopython=True,parallel=True)
197
+ # @cpu(cache=True)
198
+ def generate_random_string(length):
199
+ try:
200
+ tx = cuda.threadIdx.x
201
+ bx = cuda.blockIdx.x
202
+ dx = cuda.blockDim.x
203
+ pos = tx + bx * dx
204
+ except:
205
+ pos = 0
206
+
207
+ length=length[pos]
208
 
209
  characters = string.ascii_letters + string.digits
210
  return ''.join(random.choice(characters) for _ in range(length))
211
 
212
  @gpu(cache=True)
213
+ # @cpu(cache=True,nopython=True,parallel=True)
214
+ # @cpu(cache=True)
215
+ def Piper(image,positive,negative,motion):
216
+ try:
217
+ tx = cuda.threadIdx.x
218
+ bx = cuda.blockIdx.x
219
+ dx = cuda.blockDim.x
220
+ pos = tx + bx * dx
221
+ except:
222
+ pos = 0
223
+
224
+ image=image[pos]
225
+ positive=positive[pos]
226
+ negative=negative[pos]
227
+ motion=motion[pos]
228
 
229
  global last_motion
230
  global ip_loaded
 
262
  )
263
 
264
  @gpu(cache=True)
265
+ # @cpu(cache=True,nopython=True,parallel=True)
266
+ # @cpu(cache=True)
267
+ def infer(pm):
268
+ try:
269
+ tx = cuda.threadIdx.x
270
+ bx = cuda.blockIdx.x
271
+ dx = cuda.blockDim.x
272
+ pos = tx + bx * dx
273
+ except:
274
+ pos = 0
275
+
276
+ pm = pm[pos]
277
 
278
  print("infer: started")
279
 
280
  p1 = pm["p"]
281
+ name = generate_random_string([12])+".png"
282
 
283
  neg = pm["n"]
284
  if neg != "":
 
291
 
292
  if pm["i"] == None:
293
  return None
294
+ out = Piper[32,32]([pm["i"]],[posi],[neg],[pm["m"]])
295
  export_to_gif(out.frames[0],name,fps=fps)
296
  return name
297
 
298
+ # @gpu(cache=True)
299
+ @cpu(cache=True,nopython=True,parallel=True)
300
+ # @cpu(cache=True)
301
+ def handle(i,m,p1,p2,result):
302
+ try:
303
+ tx = cuda.threadIdx.x
304
+ bx = cuda.blockIdx.x
305
+ dx = cuda.blockDim.x
306
+ pos = tx + bx * dx
307
+ except:
308
+ pos = 0
309
+
310
+ i=i[pos]
311
+ m=m[pos]
312
+ p1=p1[pos]
313
+ p2=p2[pos]
314
+ result=result[pos]
315
+
316
+ p1_en = translate([p1],["english"])
317
+ p2_en = translate([p2],["english"])
318
  pm = {"p":p1_en,"n":p2_en,"m":m,"i":i}
319
  ln = len(result)
320
  rng = list(range(ln))
321
  arr = [pm for _ in rng]
322
  #with Pool(f'{ ln }:ppn=2', queue='productionQ', timelimit='5:00:00', workdir='.') as pool:
323
  #return pool.map(infer,arr)
324
+ ret = infer[32+ln,32](arr)
325
  return ret
326
 
327
+ # @gpu(cache=True)
328
+ # @cpu(cache=True,nopython=True,parallel=True)
329
+ @cpu(cache=True)
 
 
330
  def ui():
331
+ try:
332
+ tx = cuda.threadIdx.x
333
+ bx = cuda.blockIdx.x
334
+ dx = cuda.blockDim.x
335
+ pos = tx + bx * dx
336
+ except:
337
+ pos = 0
338
 
339
  with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
340
  with gr.Column(elem_id="col-container"):
 
384
 
385
  gr.on(
386
  triggers=[run_button.click, prompt.submit, prompt2.submit],
387
+ fn=handle,inputs=[[img],[motion],[prompt],[prompt2],[result]],outputs=result
388
  )
389
  demo.queue().launch()
390
 
391
+ # @gpu(cache=True)
392
+ # @cpu(cache=True,nopython=True,parallel=True)
393
+ @cpu(cache=True)
 
 
394
  def pre():
395
+ try:
396
+ tx = cuda.threadIdx.x
397
+ bx = cuda.blockIdx.x
398
+ dx = cuda.blockDim.x
399
+ pos = tx + bx * dx
400
+ except:
401
+ pos = 0
402
 
403
  pipe = AnimateDiffPipeline.from_pretrained(base, vae=vae, motion_adapter=adapter, torch_dtype=dtype).to(device)
404
  pipe.scheduler = DDIMScheduler(
 
415
  pipe.enable_free_init(method="butterworth", use_fast_sampling=fast)
416
 
417
  # @gpu(cache=True)
418
+ # @cpu(cache=True,nopython=True,parallel=True)
419
+ @cpu(cache=True)
 
 
420
  def entry():
421
+ try:
422
+ tx = cuda.threadIdx.x
423
+ bx = cuda.blockIdx.x
424
+ dx = cuda.blockDim.x
425
+ pos = tx + bx * dx
426
+ except:
427
+ pos = 0
428
+
429
  os.chdir(os.path.abspath(os.path.dirname(__file__)))
430
  mp.set_start_method("spawn", force=True)
431
+ pre()
432
+ ui()
433
 
434
  # entry
435