Spaces:

akshit-g
/

SeeForMe-Life

Sleeping

App Files Files Community

akshit-g commited on Dec 3, 2024

Commit

97974d9

1 Parent(s): e0b4ef2

update : CPU

Browse files

Files changed (4) hide show

README.md +7 -5
app.py +117 -0
control_vectors.pt +3 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
 ---
-title: SeeForMe LifeCrisis
-emoji: 🌍
-colorFrom: gray
-colorTo: red
 sdk: gradio
-sdk_version: 5.7.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SeeForMe-LifeCrisis
+emoji: 🌜
+colorFrom: indigo
+colorTo: blue
 sdk: gradio
+sdk_version: 4.19.2
 app_file: app.py
 pinned: false
+short_description: When you are questioning life and the meaning of life
+license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import spaces
+import torch
+import re
+import gradio as gr
+from threading import Thread
+from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
+from PIL import ImageDraw
+from torchvision.transforms.v2 import Resize
+import subprocess
+# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+model_id = "vikhyatk/moondream2"
+revision = "2024-05-20"
+tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
+moondream = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, revision=revision,
+    # torch_dtype=torch.bfloat16, device_map={"": "cuda"}
+    torch_dtype=torch.float32, device_map="cpu"
+    # attn_implementation="flash_attention_2"
+)
+moondream.eval()
+control_vectors = torch.load("control_vectors.pt", map_location="cpu")
+control_vectors = [t.to('cpu', dtype=torch.float32) for t in control_vectors]
+class LayerWrapper(torch.nn.Module):
+    def __init__(self, og_layer, control_vectors, scale=4.2):
+        super().__init__()
+        self.og_layer = og_layer
+        self.control_vectors = control_vectors
+        self.scale = scale
+    def forward(self, *args, **kwargs):
+        layer_outputs = self.og_layer(*args, **kwargs)
+        layer_outputs = (layer_outputs[0] + self.scale * self.control_vectors, *layer_outputs[1:])
+        return layer_outputs
+moondream.text_model.transformer.h = torch.nn.ModuleList([
+    LayerWrapper(layer, vector, 4.2)
+    for layer, vector in zip(moondream.text_model.transformer.h, control_vectors)
+])
+@spaces.GPU(duration=10)
+def answer_question(img, prompt):
+    image_embeds = moondream.encode_image(img)
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    thread = Thread(
+        target=moondream.answer_question,
+        kwargs={
+            "image_embeds": image_embeds,
+            "question": prompt,
+            "tokenizer": tokenizer,
+            "streamer": streamer,
+            "repetition_penalty": 1.2,
+            "temperature": 0.1,
+            "do_sample": True,
+            "length_penalty": 1.2
+        },
+    )
+    thread.start()
+    buffer = ""
+    for new_text in streamer:
+        buffer += new_text
+        yield buffer.strip()
+def extract_floats(text):
+    # Regular expression to match an array of four floating point numbers
+    pattern = r"\[\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\s*\]"
+    match = re.search(pattern, text)
+    if match:
+        # Extract the numbers and convert them to floats
+        return [float(num) for num in match.groups()]
+    return None  # Return None if no match is found
+def extract_bbox(text):
+    bbox = None
+    if extract_floats(text) is not None:
+        x1, y1, x2, y2 = extract_floats(text)
+        bbox = (x1, y1, x2, y2)
+    return bbox
+def process_answer(img, answer):
+    if extract_bbox(answer) is not None:
+        x1, y1, x2, y2 = extract_bbox(answer)
+        draw_image = Resize(768)(img)
+        width, height = draw_image.size
+        x1, x2 = int(x1 * width), int(x2 * width)
+        y1, y2 = int(y1 * height), int(y2 * height)
+        bbox = (x1, y1, x2, y2)
+        ImageDraw.Draw(draw_image).rectangle(bbox, outline="red", width=3)
+        return gr.update(visible=True, value=draw_image)
+    return gr.update(visible=False, value=None)
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # 🌜 Just for Fun to discuss the meaning of life using [activation vectors]
+        """
+    )
+    with gr.Row():
+        prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
+        submit = gr.Button("Submit")
+    with gr.Row():
+        img = gr.Image(type="pil", label="Upload an Image")
+        with gr.Column():
+            output = gr.Markdown(label="Response")
+            ann = gr.Image(visible=False, label="Annotated Image")
+    submit.click(answer_question, [img, prompt], output)
+    prompt.submit(answer_question, [img, prompt], output)
+    output.change(process_answer, [img, output], ann, show_progress=False)
+demo.queue().launch()

control_vectors.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e233c0a671e74f0927ae189a9932f7d7236a347b07ab114bec7ca333c121d92
+size 105518

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+timm==0.9.12
+transformers==4.36.2
+einops==0.7.0
+accelerate==0.25.0