Spaces:

Staticaliza
/

Sense

Running

App Files Files Community

Staticaliza commited on May 28

Commit

4036c77

verified ·

1 Parent(s): f4f7208

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -11

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ footer {
 }
 '''
-global_instruction = "You will analyze video, audio and text input and output your description of the given content with as much keywords and always take a guess."
 input_prefixes = {
     "Image": "Analyze the '█' image.",
@@ -52,14 +52,12 @@ filetypes = {
 }
 # Functions
-def uniform_sample(seq, n):
-    step = max(len(seq) // n, 1)
-    return seq[::step][:n]
 def build_video(path):
     vr = VideoReader(path, ctx = cpu(0))
-    idx = uniform_sample(range(len(vr)), MAX_FRAMES)
-    batch = vr.get_batch(idx).asnumpy()
     frames = [Image.fromarray(frame.astype("uint8")) for frame in batch]
     audio = build_audio(path)
@@ -83,10 +81,10 @@ def build_image(path):
     return image
 def build_gif(path):
-    image = Image.open(path)
-    frames = [f.copy().convert("RGB") for f in ImageSequence.Iterator(image)]
-    frames = uniform_sample(frames, MAX_FRAMES)
-    return frames
 def build_audio(path):
     audio, _ = librosa.load(path, sr=AUDIO_SR, mono=True)
@@ -142,7 +140,7 @@ with gr.Blocks(css=css) as main:
     with gr.Column():
         input = gr.File(label="Input", file_types=["image", "video", "audio"], type="filepath")
         instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
-        sampling = gr.Checkbox(value=True, label="Sampling")
         temperature = gr.Slider(minimum=0, maximum=2, step=0.01, value=1, label="Temperature")
         top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.95, label="Top P")
         top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=50, label="Top K")

 }
 '''
+global_instruction = "You will analyze image, GIF, video, and audio input, then use as much keywords to describe the given content and take as much guesses of what it could be."
 input_prefixes = {
     "Image": "Analyze the '█' image.",
 }
 # Functions
+uniform_sample=lambda seq, n: seq[::max(len(seq) // n,1)][:n]
 def build_video(path):
     vr = VideoReader(path, ctx = cpu(0))
+    i = uniform_sample(range(len(vr)), MAX_FRAMES)
+    batch = vr.get_batch(i).asnumpy()
     frames = [Image.fromarray(frame.astype("uint8")) for frame in batch]
     audio = build_audio(path)
     return image
 def build_gif(path):
+    vr = VideoReader(path, ctx=cpu(0))
+    i = uniform_sample(range(len(vr)), MAX_FRAMES)
+    frames = vr.get_batch(i).asnumpy()
+    return [Image.fromarray(f.astype("uint8")) for f in frames]
 def build_audio(path):
     audio, _ = librosa.load(path, sr=AUDIO_SR, mono=True)
     with gr.Column():
         input = gr.File(label="Input", file_types=["image", "video", "audio"], type="filepath")
         instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
+        sampling = gr.Checkbox(value=False, label="Sampling")
         temperature = gr.Slider(minimum=0, maximum=2, step=0.01, value=1, label="Temperature")
         top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.95, label="Top P")
         top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=50, label="Top K")