Staticaliza commited on
Commit
e6f4055
·
verified ·
1 Parent(s): 842b1c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -1
app.py CHANGED
@@ -87,6 +87,28 @@ def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7,
87
  elif filetype == "GIF":
88
  frames = encode_gif(input)
89
  content.extend(frames)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  elif filetype == "Video":
91
  frames = encode_video(input)
92
  content.extend(frames)
@@ -97,7 +119,8 @@ def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7,
97
  content.append(audio)
98
  else:
99
  return "Unsupported file type."
100
-
 
101
  filename = os.path.basename(input)
102
  prefix = input_prefixes[filetype].replace("█", filename)
103
  content.append(prefix + instruction)
 
87
  elif filetype == "GIF":
88
  frames = encode_gif(input)
89
  content.extend(frames)
90
+ elif filetype == "Video":
91
+ vr = VideoReader(input, ctx=cpu(0))
92
+ fps = round(vr.get_avg_fps())
93
+ raw_idxs = list(range(0, len(vr), fps))
94
+ idxs = raw_idxs if len(raw_idxs) <= MAX_FRAMES else uniform_sample(raw_idxs, MAX_FRAMES)
95
+ frames_np = vr.get_batch(idxs).asnumpy()
96
+ audio_np, sample_rate = librosa.load(input, sr=16000, mono=True)
97
+ for idx, frame_np in zip(idxs, frames_np):
98
+ image = Image.fromarray(frame_np.astype("uint8")).convert("RGB")
99
+ content.append(image)
100
+ sec = idx // fps
101
+ start = sec * sample_rate
102
+ end = start + sample_rate
103
+ chunk_np = audio_np[start:end]
104
+ chunk_tensor = torch.from_numpy(chunk_np).float().to(DEVICE)
105
+ content.append({"array": chunk_tensor, "sampling_rate": sample_rate})
106
+ elif filetype == "Audio":
107
+ audio_np, sample_rate = librosa.load(input, sr=16000, mono=True)
108
+ chunk_tensor = torch.from_numpy(audio_np).float().to(DEVICE)
109
+ content.append({"array": chunk_tensor, "sampling_rate": sample_rate})
110
+
111
+ """
112
  elif filetype == "Video":
113
  frames = encode_video(input)
114
  content.extend(frames)
 
119
  content.append(audio)
120
  else:
121
  return "Unsupported file type."
122
+ """
123
+
124
  filename = os.path.basename(input)
125
  prefix = input_prefixes[filetype].replace("█", filename)
126
  content.append(prefix + instruction)