DrishtiSharma commited on
Commit
5c32165
·
verified ·
1 Parent(s): 4c89624

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from yt_dlp import YoutubeDL
3
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
4
+ from qwen_vl_utils import process_vision_info
5
+ import torch
6
+
7
+ # Title and Description
8
+ st.title("Video Analysis with Qwen2-VL")
9
+ st.markdown("""
10
+ This app downloads a YouTube video, processes it, and analyzes it using the Qwen2-VL model.
11
+ """)
12
+
13
+ # User input for YouTube URL
14
+ url = st.text_input("Enter YouTube Video URL:", value="https://www.youtube.com/watch?v=MCWJNOfJoSM")
15
+
16
+ if st.button("Analyze Video"):
17
+ with st.spinner("Downloading video..."):
18
+ ydl_opts = {
19
+ "format": "best",
20
+ "outtmpl": "football.mp4"
21
+ }
22
+ try:
23
+ with YoutubeDL(ydl_opts) as ydl:
24
+ ydl.download([url])
25
+ st.success("Video downloaded successfully!")
26
+ except Exception as e:
27
+ st.error(f"Error downloading video: {e}")
28
+ st.stop()
29
+
30
+ with st.spinner("Loading model..."):
31
+ MODEL_NAME = "Qwen/Qwen2-VL-7B-Instruct"
32
+
33
+ try:
34
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
35
+ MODEL_NAME,
36
+ torch_dtype=torch.bfloat16,
37
+ device_map="auto",
38
+ attn_implementation="flash_attention_2",
39
+ )
40
+ processor = AutoProcessor.from_pretrained(MODEL_NAME)
41
+ st.success("Model loaded successfully!")
42
+ except Exception as e:
43
+ st.error(f"Error loading model: {e}")
44
+ st.stop()
45
+
46
+ # Process video and generate response
47
+ messages = [
48
+ {
49
+ "role": "user",
50
+ "content": [
51
+ {
52
+ "type": "video",
53
+ "video": "football.mp4",
54
+ "max_pixels": 1280 * 780,
55
+ "fps": 0.1,
56
+ },
57
+ {"type": "text", "text": "What's happening in the video? Who wins the penalty shootout?"},
58
+ ],
59
+ }
60
+ ]
61
+
62
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
63
+ image_inputs, video_inputs = process_vision_info(messages)
64
+
65
+ inputs = processor(
66
+ text=[text],
67
+ images=image_inputs,
68
+ videos=video_inputs,
69
+ padding=True,
70
+ return_tensors="pt",
71
+ )
72
+ inputs = inputs.to("cuda")
73
+
74
+ with st.spinner("Generating response..."):
75
+ try:
76
+ generated_ids = model.generate(**inputs, max_new_tokens=512)
77
+ generated_ids_trimmed = [
78
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
79
+ ]
80
+
81
+ output_text = processor.batch_decode(
82
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
83
+ )
84
+ st.success("Response generated!")
85
+ st.text_area("Model Output:", value=output_text[0], height=200)
86
+ except Exception as e:
87
+ st.error(f"Error generating response: {e}")