Spaces:

Quazim0t0
/

FastVLMBoxes

Running

File size: 1,778 Bytes

1a9c884

export const GLASS_EFFECTS = {
  BASE_FREQUENCY: 0.008,
  NUM_OCTAVES: 2,
  SCALE: 77,
  COLORS: {
    DEFAULT_BG: "rgba(0, 0, 0, 0.25)",
    SUCCESS_BG: "rgba(0, 50, 0, 0.25)",
    ERROR_BG: "rgba(50, 0, 0, 0.25)",
    BUTTON_BG: "rgba(59, 130, 246, 0.25)",
    HIGHLIGHT: "rgba(255, 255, 255, 0.15)",
    TEXT: "#ffffff",
  },
} as const;

export const LAYOUT = {
  MARGINS: {
    DEFAULT: 20,
    BOTTOM: 20,
  },
  DIMENSIONS: {
    PROMPT_WIDTH: 420,
    CAPTION_WIDTH: 150,
    CAPTION_HEIGHT: 45,
  },
  TRANSITIONS: {
    SCALE_DURATION: 200,
    OPACITY_DURATION: 200,
    TRANSFORM_DURATION: 400,
  },
} as const;

export const TIMING = {
  FRAME_CAPTURE_DELAY: 50,
  VIDEO_RECOVERY_INTERVAL: 1000,
  RESIZE_DEBOUNCE: 50,
  SUGGESTION_DELAY: 50,
} as const;

const DEFAULT_PROMPT = "Describe what you see in one sentence.";
export const PROMPTS = {
  default: DEFAULT_PROMPT,
  placeholder: DEFAULT_PROMPT,

  suggestions: [
    DEFAULT_PROMPT,
    "What is the color of my shirt?",
    "Identify any text or written content visible.",
    "What emotions or actions are being portrayed?",
    "Name the object I am holding in my hand.",
  ],

  fallbackCaption: "Waiting for first caption...",
  processingMessage: "Starting analysis...",
} as const;

export const FASTVLM_BOXING_PROMPT =
  "You are an object detection assistant. Analyze the image and respond with a JSON array of detected objects. Each object should have a 'label' (string) and 'bbox_2d' (an array of 4 integers: [x_min, y_min, x_max, y_max]). Only include objects relevant to the user's prompt. Do not include any other text besides the JSON. The JSON should be wrapped in a markdown code block like so: ```json\\n[...json_array...]\\n```";