assentian1970 commited on
Commit
2d17b60
·
verified ·
1 Parent(s): bead986

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +523 -11
app.py CHANGED
@@ -1,4 +1,14 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
2
  from datetime import datetime
3
  import torch
4
  from transformers import AutoModel, AutoTokenizer
@@ -14,17 +24,9 @@ from ultralytics import YOLO
14
  import numpy as np
15
  import cv2
16
 
17
- # Add this after other model configurations
18
- YOLO_MODEL = YOLO('best_yolov11.pt') # Load YOLOv11 model
19
-
20
- # Check if CUDA is available
21
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
-
23
- # Initialize GPU if available
24
- if DEVICE == "cuda":
25
- def debug():
26
- torch.randn(10).cuda()
27
- debug()
28
 
29
  # File type validation
30
  IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
@@ -55,6 +57,516 @@ except Exception as e:
55
 
56
  MAX_NUM_FRAMES = 32
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def load_model_and_tokenizer():
59
  """Load a fresh instance of the model and tokenizer"""
60
  try:
 
1
+ #!/usr/bin/env python
2
+ # encoding: utf-8
3
+ import spaces
4
+ import torch
5
+
6
+ # GPU initialization using Spaces decorator
7
+ @spaces.GPU
8
+ def debug():
9
+ torch.randn(10).cuda()
10
+ debug()
11
+
12
  from datetime import datetime
13
  import torch
14
  from transformers import AutoModel, AutoTokenizer
 
24
  import numpy as np
25
  import cv2
26
 
27
+ # Load YOLOv11 model and set device (if supported by YOLO, pass the device)
 
 
 
28
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
29
+ YOLO_MODEL = YOLO('best_yolov11.pt', device=DEVICE)
 
 
 
 
 
30
 
31
  # File type validation
32
  IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
 
57
 
58
  MAX_NUM_FRAMES = 32
59
 
60
+ def load_model_and_tokenizer():
61
+ """Load a fresh instance of the model and tokenizer"""
62
+ try:
63
+ # Clear GPU memory if using CUDA
64
+ if DEVICE == "cuda":
65
+ torch.cuda.empty_cache()
66
+ gc.collect()
67
+
68
+ model = AutoModel.from_pretrained(
69
+ model_path,
70
+ attn_implementation='sdpa',
71
+ trust_remote_code=True,
72
+ torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
73
+ device_map='auto'
74
+ )
75
+
76
+ tokenizer = AutoTokenizer.from_pretrained(
77
+ model_path,
78
+ trust_remote_code=True
79
+ )
80
+ model.eval()
81
+ processor = model.init_processor(tokenizer)
82
+ return model, tokenizer, processor
83
+ except Exception as e:
84
+ print(f"Error loading model: {str(e)}")
85
+ raise
86
+
87
+ def process_video_chunk(video_frames, model, tokenizer, processor, prompt):
88
+ """Process a chunk of video frames with mPLUG model"""
89
+ messages = [
90
+ {
91
+ "role": "user",
92
+ "content": prompt,
93
+ "video_frames": video_frames
94
+ }
95
+ ]
96
+
97
+ model_messages = []
98
+ videos = []
99
+
100
+ for msg in messages:
101
+ content_str = msg["content"]
102
+ if "video_frames" in msg and msg["video_frames"]:
103
+ content_str += "<|video|>"
104
+ videos.append(msg["video_frames"])
105
+ model_messages.append({
106
+ "role": msg["role"],
107
+ "content": content_str
108
+ })
109
+
110
+ model_messages.append({
111
+ "role": "assistant",
112
+ "content": ""
113
+ })
114
+
115
+ inputs = processor(
116
+ model_messages,
117
+ images=None,
118
+ videos=videos if videos else None
119
+ )
120
+ # Use DEVICE variable so that CPU-only environments aren’t forced to cuda
121
+ inputs.to(DEVICE)
122
+ inputs.update({
123
+ 'tokenizer': tokenizer,
124
+ 'max_new_tokens': 100,
125
+ 'decode_text': True,
126
+ })
127
+
128
+ response = model.generate(**inputs)
129
+ return response[0]
130
+
131
+ def encode_video_in_chunks(video_path):
132
+ """Extract frames from a video in chunks"""
133
+ vr = VideoReader(video_path, ctx=cpu(0))
134
+ sample_fps = round(vr.get_avg_fps() / 1) # 1 FPS
135
+ frame_idx = [i for i in range(0, len(vr), sample_fps)]
136
+
137
+ # Split frame indices into chunks
138
+ chunks = [
139
+ frame_idx[i:i + MAX_NUM_FRAMES]
140
+ for i in range(0, len(frame_idx), MAX_NUM_FRAMES)
141
+ ]
142
+
143
+ for chunk_idx, chunk in enumerate(chunks):
144
+ frames = vr.get_batch(chunk).asnumpy()
145
+ frames = [Image.fromarray(v.astype('uint8')) for v in frames]
146
+ yield chunk_idx, frames
147
+
148
+ def detect_people_and_machinery(media_path):
149
+ """Detect people and machinery using YOLOv11 for both images and videos"""
150
+ try:
151
+ # Initialize counters with maximum values
152
+ max_people_count = 0
153
+ max_machine_types = {
154
+ "Tower Crane": 0,
155
+ "Mobile Crane": 0,
156
+ "Compactor/Roller": 0,
157
+ "Bulldozer": 0,
158
+ "Excavator": 0,
159
+ "Dump Truck": 0,
160
+ "Concrete Mixer": 0,
161
+ "Loader": 0,
162
+ "Pump Truck": 0,
163
+ "Pile Driver": 0,
164
+ "Grader": 0,
165
+ "Other Vehicle": 0
166
+ }
167
+
168
+ # Check if input is video
169
+ if isinstance(media_path, str) and is_video(media_path):
170
+ cap = cv2.VideoCapture(media_path)
171
+ fps = cap.get(cv2.CAP_PROP_FPS)
172
+ sample_rate = max(1, int(fps)) # Sample 1 frame per second
173
+ frame_count = 0 # Initialize frame counter
174
+
175
+ while cap.isOpened():
176
+ ret, frame = cap.read()
177
+ if not ret:
178
+ break
179
+
180
+ # Process every nth frame based on sample rate
181
+ if frame_count % sample_rate == 0:
182
+ results = YOLO_MODEL(frame)
183
+ people, _, machine_types = process_yolo_results(results)
184
+
185
+ # Update maximum counts
186
+ max_people_count = max(max_people_count, people)
187
+ for k, v in machine_types.items():
188
+ max_machine_types[k] = max(max_machine_types[k], v)
189
+
190
+ frame_count += 1
191
+
192
+ cap.release()
193
+
194
+ else:
195
+ # Handle single image
196
+ if isinstance(media_path, str):
197
+ img = cv2.imread(media_path)
198
+ else:
199
+ # Handle PIL Image
200
+ img = cv2.cvtColor(np.array(media_path), cv2.COLOR_RGB2BGR)
201
+
202
+ results = YOLO_MODEL(img)
203
+ max_people_count, _, max_machine_types = process_yolo_results(results)
204
+
205
+ # Filter out machinery types with zero count
206
+ max_machine_types = {k: v for k, v in max_machine_types.items() if v > 0}
207
+ total_machinery_count = sum(max_machine_types.values())
208
+
209
+ return max_people_count, total_machinery_count, max_machine_types
210
+
211
+ except Exception as e:
212
+ print(f"Error in YOLO detection: {str(e)}")
213
+ return 0, 0, {}
214
+
215
+ def process_yolo_results(results):
216
+ """Process YOLO detection results and count people and machinery"""
217
+ people_count = 0
218
+ machine_types = {
219
+ "Tower Crane": 0,
220
+ "Mobile Crane": 0,
221
+ "Compactor/Roller": 0,
222
+ "Bulldozer": 0,
223
+ "Excavator": 0,
224
+ "Dump Truck": 0,
225
+ "Concrete Mixer": 0,
226
+ "Loader": 0,
227
+ "Pump Truck": 0,
228
+ "Pile Driver": 0,
229
+ "Grader": 0,
230
+ "Other Vehicle": 0
231
+ }
232
+
233
+ # Process detection results
234
+ for r in results:
235
+ boxes = r.boxes
236
+ for box in boxes:
237
+ cls = int(box.cls[0])
238
+ conf = float(box.conf[0])
239
+ class_name = YOLO_MODEL.names[cls]
240
+
241
+ # Count people (Worker class)
242
+ if class_name.lower() == 'worker' and conf > 0.5:
243
+ people_count += 1
244
+
245
+ # Map YOLO classes to machinery types
246
+ machinery_mapping = {
247
+ 'tower_crane': "Tower Crane",
248
+ 'mobile_crane': "Mobile Crane",
249
+ 'compactor': "Compactor/Roller",
250
+ 'roller': "Compactor/Roller",
251
+ 'bulldozer': "Bulldozer",
252
+ 'dozer': "Bulldozer",
253
+ 'excavator': "Excavator",
254
+ 'dump_truck': "Dump Truck",
255
+ 'truck': "Dump Truck",
256
+ 'concrete_mixer_truck': "Concrete Mixer",
257
+ 'loader': "Loader",
258
+ 'pump_truck': "Pump Truck",
259
+ 'pile_driver': "Pile Driver",
260
+ 'grader': "Grader",
261
+ 'other_vehicle': "Other Vehicle"
262
+ }
263
+
264
+ # Count machinery
265
+ if conf > 0.5:
266
+ class_lower = class_name.lower()
267
+ for key, value in machinery_mapping.items():
268
+ if key in class_lower:
269
+ machine_types[value] += 1
270
+ break
271
+
272
+ total_machinery = sum(machine_types.values())
273
+ return people_count, total_machinery, machine_types
274
+
275
+ def analyze_video_activities(video_path):
276
+ """Analyze video using mPLUG model with chunking"""
277
+ try:
278
+ all_responses = []
279
+ chunk_generator = encode_video_in_chunks(video_path)
280
+
281
+ for chunk_idx, video_frames in chunk_generator:
282
+ # Load fresh model instance for each chunk
283
+ model, tokenizer, processor = load_model_and_tokenizer()
284
+
285
+ # Process the chunk
286
+ prompt = "Analyze this construction site video chunk and describe the activities happening. Focus on construction activities, machinery usage, and worker actions."
287
+ response = process_video_chunk(video_frames, model, tokenizer, processor, prompt)
288
+ all_responses.append(f"Time period {chunk_idx + 1}:\n{response}")
289
+
290
+ # Clean up GPU memory
291
+ del model, tokenizer, processor
292
+ torch.cuda.empty_cache()
293
+ gc.collect()
294
+
295
+ # Combine all responses
296
+ return "\n\n".join(all_responses)
297
+ except Exception as e:
298
+ print(f"Error analyzing video: {str(e)}")
299
+ return "Error analyzing video activities"
300
+
301
+ def process_image(image_path, model, tokenizer, processor, prompt):
302
+ """Process single image with mPLUG model"""
303
+ try:
304
+ image = Image.open(image_path)
305
+ messages = [{
306
+ "role": "user",
307
+ "content": prompt,
308
+ "images": [image]
309
+ }]
310
+
311
+ model_messages = []
312
+ images = []
313
+
314
+ for msg in messages:
315
+ content_str = msg["content"]
316
+ if "images" in msg and msg["images"]:
317
+ content_str += "<|image|>"
318
+ images.extend(msg["images"])
319
+ model_messages.append({
320
+ "role": msg["role"],
321
+ "content": content_str
322
+ })
323
+
324
+ model_messages.append({
325
+ "role": "assistant",
326
+ "content": ""
327
+ })
328
+
329
+ inputs = processor(
330
+ model_messages,
331
+ images=images,
332
+ videos=None
333
+ )
334
+ # Use the DEVICE variable for transferring inputs
335
+ inputs.to(DEVICE)
336
+ inputs.update({
337
+ 'tokenizer': tokenizer,
338
+ 'max_new_tokens': 100,
339
+ 'decode_text': True,
340
+ })
341
+
342
+ response = model.generate(**inputs)
343
+ return response[0]
344
+ except Exception as e:
345
+ print(f"Error processing image: {str(e)}")
346
+ return "Error processing image"
347
+
348
+ def analyze_image_activities(image_path):
349
+ """Analyze image using mPLUG model"""
350
+ try:
351
+ model, tokenizer, processor = load_model_and_tokenizer()
352
+ prompt = "Analyze this construction site image and describe the activities happening. Focus on construction activities, machinery usage, and worker actions."
353
+ response = process_image(image_path, model, tokenizer, processor, prompt)
354
+
355
+ del model, tokenizer, processor
356
+ if DEVICE == "cuda":
357
+ torch.cuda.empty_cache()
358
+ gc.collect()
359
+
360
+ return response
361
+ except Exception as e:
362
+ print(f"Error analyzing image: {str(e)}")
363
+ return "Error analyzing image activities"
364
+
365
+ # ------------------------------------------------------------------
366
+ # NEW: Function to annotate each frame with bounding boxes & counts
367
+ # ------------------------------------------------------------------
368
+ def annotate_video_with_bboxes(video_path):
369
+ """
370
+ Reads the entire video frame-by-frame, runs YOLO, draws bounding boxes,
371
+ writes a per-frame summary of detected classes on the frame, and saves
372
+ as a new annotated video. Returns: annotated_video_path
373
+ """
374
+ cap = cv2.VideoCapture(video_path)
375
+ fps = cap.get(cv2.CAP_PROP_FPS)
376
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
377
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
378
+
379
+ # Create a temp file for output
380
+ out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
381
+ annotated_video_path = out_file.name
382
+ out_file.close()
383
+
384
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
385
+ writer = cv2.VideoWriter(annotated_video_path, fourcc, fps, (w, h))
386
+
387
+ while True:
388
+ ret, frame = cap.read()
389
+ if not ret:
390
+ break
391
+
392
+ results = YOLO_MODEL(frame)
393
+
394
+ # Dictionary to hold per-frame counts of each class
395
+ frame_counts = {}
396
+
397
+ for r in results:
398
+ boxes = r.boxes
399
+ for box in boxes:
400
+ cls_id = int(box.cls[0])
401
+ conf = float(box.conf[0])
402
+ if conf < 0.5:
403
+ continue # Skip low-confidence
404
+
405
+ x1, y1, x2, y2 = box.xyxy[0]
406
+ class_name = YOLO_MODEL.names[cls_id]
407
+
408
+ # Convert to int
409
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
410
+
411
+ # Draw bounding box
412
+ color = (0, 255, 0)
413
+ cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
414
+
415
+ label_text = f"{class_name} {conf:.2f}"
416
+ cv2.putText(frame, label_text, (x1, y1 - 6),
417
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
418
+
419
+ # Increment per-frame class count
420
+ frame_counts[class_name] = frame_counts.get(class_name, 0) + 1
421
+
422
+ # Build a summary line, e.g. "Worker: 2, Excavator: 1, ..."
423
+ summary_str = ", ".join(f"{cls_name}: {count}"
424
+ for cls_name, count in frame_counts.items())
425
+
426
+ # Put the summary text in the top-left
427
+ cv2.putText(
428
+ frame,
429
+ summary_str,
430
+ (15, 30), # position
431
+ cv2.FONT_HERSHEY_SIMPLEX,
432
+ 1.0,
433
+ (255, 255, 0),
434
+ 2
435
+ )
436
+
437
+ writer.write(frame)
438
+
439
+ cap.release()
440
+ writer.release()
441
+ return annotated_video_path
442
+
443
+ # ----------------------------------------------------------------------------
444
+ # Update process_diary function to also return an annotated video if it's video
445
+ # ----------------------------------------------------------------------------
446
+ def process_diary(day, date, total_people, total_machinery, machinery_types, activities, media):
447
+ """Process the site diary entry"""
448
+ if media is None:
449
+ # Return 6 text outputs as before + None for video
450
+ return [day, date, "No media uploaded", "No media uploaded", "No media uploaded", "No media uploaded", None]
451
+
452
+ try:
453
+ if not hasattr(media, 'name'):
454
+ raise ValueError("Invalid file upload")
455
+
456
+ file_ext = get_file_extension(media.name)
457
+ if not (is_image(media.name) or is_video(media.name)):
458
+ raise ValueError(f"Unsupported file type: {file_ext}")
459
+
460
+ with tempfile.NamedTemporaryFile(suffix=file_ext, delete=False) as temp_file:
461
+ temp_path = temp_file.name
462
+ if hasattr(media, 'name') and os.path.exists(media.name):
463
+ with open(media.name, 'rb') as f:
464
+ temp_file.write(f.read())
465
+ else:
466
+ file_content = media.read() if hasattr(media, 'read') else media
467
+ temp_file.write(file_content if isinstance(file_content, bytes) else file_content.read())
468
+
469
+ detected_people, detected_machinery, detected_machinery_types = detect_people_and_machinery(temp_path)
470
+
471
+ # Default: no annotated video
472
+ annotated_video_path = None
473
+
474
+ if is_image(media.name):
475
+ # If it's an image, do normal image analysis
476
+ detected_activities = analyze_image_activities(temp_path)
477
+ else:
478
+ # If it's a video, do video analysis & also annotate the video
479
+ detected_activities = analyze_video_activities(temp_path)
480
+ annotated_video_path = annotate_video_with_bboxes(temp_path)
481
+
482
+ if os.path.exists(temp_path):
483
+ os.remove(temp_path)
484
+
485
+ detected_types_str = ", ".join([f"{k}: {v}" for k, v in detected_machinery_types.items()])
486
+ # Return 7 outputs (the first 6 as before, plus the annotated video path)
487
+ return [day, date, str(detected_people), str(detected_machinery), detected_types_str, detected_activities, annotated_video_path]
488
+
489
+ except Exception as e:
490
+ print(f"Error processing media: {str(e)}")
491
+ return [day, date, "Error processing media", "Error processing media", "Error processing media", "Error processing media", None]
492
+
493
+ # Create the Gradio interface
494
+ import gradio as gr
495
+ with gr.Blocks(title="Digital Site Diary") as demo:
496
+ gr.Markdown("# 📝 Digital Site Diary")
497
+
498
+ with gr.Row():
499
+ # User Input Column
500
+ with gr.Column():
501
+ gr.Markdown("### User Input")
502
+ day = gr.Textbox(label="Day", value='9')
503
+ date = gr.Textbox(label="Date", placeholder="YYYY-MM-DD", value=datetime.now().strftime("%Y-%m-%d"))
504
+ total_people = gr.Number(label="Total Number of People", precision=0, value=10)
505
+ total_machinery = gr.Number(label="Total Number of Machinery", precision=0, value=3)
506
+ machinery_types = gr.Textbox(
507
+ label="Number of Machinery Per Type",
508
+ placeholder="e.g., Excavator: 2, Roller: 1",
509
+ value="Excavator: 2, Roller: 1"
510
+ )
511
+ activities = gr.Textbox(
512
+ label="Activity",
513
+ placeholder="e.g., 9 AM: Excavation, 10 AM: Concreting",
514
+ value="9 AM: Excavation, 10 AM: Concreting",
515
+ lines=3
516
+ )
517
+ media = gr.File(label="Upload Image/Video", file_types=["image", "video"])
518
+ submit_btn = gr.Button("Submit", variant="primary")
519
+
520
+ # Model Detection Column
521
+ with gr.Column():
522
+ gr.Markdown("### Model Detection")
523
+ model_day = gr.Textbox(label="Day")
524
+ model_date = gr.Textbox(label="Date")
525
+ model_people = gr.Textbox(label="Total Number of People")
526
+ model_machinery = gr.Textbox(label="Total Number of Machinery")
527
+ model_machinery_types = gr.Textbox(label="Number of Machinery Per Type")
528
+ model_activities = gr.Textbox(label="Activity", lines=5)
529
+ # NEW: annotated video output
530
+ model_annotated_video = gr.Video(label="Annotated Video")
531
+
532
+ # Connect the submit button to the processing function
533
+ submit_btn.click(
534
+ fn=process_diary,
535
+ inputs=[day, date, total_people, total_machinery, machinery_types, activities, media],
536
+ outputs=[
537
+ model_day,
538
+ model_date,
539
+ model_people,
540
+ model_machinery,
541
+ model_machinery_types,
542
+ model_activities,
543
+ model_annotated_video # The new 7th output
544
+ ]
545
+ )
546
+
547
+ if __name__ == "__main__":
548
+ demo.launch(share=False)
549
+ return get_file_extension(filename) in IMAGE_EXTENSIONS
550
+
551
+ def is_video(filename):
552
+ return get_file_extension(filename) in VIDEO_EXTENSIONS
553
+
554
+ # Model configuration
555
+ MODEL_NAME = 'iic/mPLUG-Owl3-7B-240728'
556
+ MODEL_CACHE_DIR = os.getenv('TRANSFORMERS_CACHE', './models')
557
+
558
+ # Create cache directory if it doesn't exist
559
+ os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
560
+
561
+ # Download and cache the model
562
+ try:
563
+ model_path = snapshot_download(MODEL_NAME, cache_dir=MODEL_CACHE_DIR)
564
+ except Exception as e:
565
+ print(f"Error downloading model: {str(e)}")
566
+ model_path = os.path.join(MODEL_CACHE_DIR, MODEL_NAME)
567
+
568
+ MAX_NUM_FRAMES = 32
569
+
570
  def load_model_and_tokenizer():
571
  """Load a fresh instance of the model and tokenizer"""
572
  try: