metek7's picture
Update app.py
3c47ae7 verified
raw
history blame
3.09 kB
import gradio as gr
import torch
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
from llava.conversation import conv_templates
import copy
from decord import VideoReader, cpu
import numpy as np
title = "# 📸 Instagram Reels Analiz Aracı"
description = """Bu araç, yüklenen Instagram Reels videolarını analiz eder ve içeriği özetler.
Video hakkında genel bir açıklama yapar ve klipte neler olup bittiğini adım adım anlatır."""
def load_video(video_path, max_frames_num=64, fps=1):
vr = VideoReader(video_path, ctx=cpu(0))
total_frame_num = len(vr)
frame_idx = list(range(0, total_frame_num, int(vr.get_avg_fps() / fps)))
if len(frame_idx) > max_frames_num:
frame_idx = np.linspace(0, total_frame_num - 1, max_frames_num, dtype=int).tolist()
video_frames = vr.get_batch(frame_idx).asnumpy()
return video_frames, len(frame_idx)
# Model yükleme
pretrained = "lmms-lab/LLaVA-Video-7B-Qwen2"
model_name = "llava_qwen"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Model yükleniyor...")
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map="auto")
model.eval()
print("Model başarıyla yüklendi!")
def analyze_reel(video_path):
video_frames, frame_count = load_video(video_path)
video = image_processor.preprocess(video_frames, return_tensors="pt")["pixel_values"].to(device).bfloat16()
prompt = f"{DEFAULT_IMAGE_TOKEN}Bu Instagram Reels videosunu analiz et. Önce videonun genel içeriğini özetle, ardından klipte neler olup bittiğini adım adım açıkla. Video {frame_count} kareye bölünmüştür."
conv = copy.deepcopy(conv_templates["qwen_1_5"])
conv.append_message(conv.roles[0], prompt)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()
input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
with torch.no_grad():
output = model.generate(
input_ids,
images=[video],
modalities=["video"],
do_sample=False,
temperature=0,
max_new_tokens=1024,
)
response = tokenizer.batch_decode(output, skip_special_tokens=True)[0].strip()
return response
def gradio_interface(video_file):
if video_file is None:
return "Lütfen bir video dosyası yükleyin."
return analyze_reel(video_file)
with gr.Blocks() as demo:
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
video_input = gr.Video(label="Instagram Reels Videosu")
output = gr.Textbox(label="Analiz Sonucu", lines=10)
analyze_button = gr.Button("Reels'i Analiz Et")
analyze_button.click(fn=gradio_interface, inputs=video_input, outputs=output)
if __name__ == "__main__":
demo.launch(share=True)