BahadirGLCK commited on
Commit
226eb77
·
1 Parent(s): 753ac1a

Appropriate for youtube original link format.

Browse files
Files changed (2) hide show
  1. app.py +24 -21
  2. requirements.txt +2 -1
app.py CHANGED
@@ -9,14 +9,16 @@ import torch
9
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
10
  from qwen_vl_utils import process_vision_info
11
  import gradio as gr
 
12
 
13
  # ----------------------------------------
14
- # 1. Initialize the Qwen 2.5 VL Model (7B)
15
  # ----------------------------------------
16
- # We load the official 7B version, using flash attention optimization and bfloat16 for efficiency.
17
  model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
18
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
19
- model_path
 
 
20
  )
21
  processor = AutoProcessor.from_pretrained(model_path)
22
 
@@ -26,7 +28,7 @@ processor = AutoProcessor.from_pretrained(model_path)
26
 
27
  def download_video(url, dest_path):
28
  """
29
- Download the video from the given URL and save it to a destination path.
30
  """
31
  response = requests.get(url, stream=True)
32
  with open(dest_path, 'wb') as f:
@@ -37,16 +39,25 @@ def download_video(url, dest_path):
37
  def get_video_frames(video_path, num_frames=64, cache_dir='.cache'):
38
  """
39
  Download (if needed) and extract frames and timestamps from the video.
40
- - Uses caching to avoid repeated processing.
41
- - Utilizes decord to read video frames.
42
  """
43
  os.makedirs(cache_dir, exist_ok=True)
44
  video_hash = hashlib.md5(video_path.encode('utf-8')).hexdigest()
45
-
46
- # If the video is a URL, download it locally
47
- if video_path.startswith('http://') or video_path.startswith('https://'):
48
  video_file_path = os.path.join(cache_dir, f'{video_hash}.mp4')
49
  if not os.path.exists(video_file_path):
 
 
 
 
 
 
 
 
 
50
  download_video(video_path, video_file_path)
51
  else:
52
  video_file_path = video_path
@@ -87,9 +98,7 @@ def inference(video_path, prompt, max_new_tokens=2048, total_pixels=20480 * 28 *
87
  {"video": video_path, "total_pixels": total_pixels, "min_pixels": min_pixels},
88
  ]},
89
  ]
90
- # Prepare the text with the chat template from the processor.
91
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
92
- # Process the video information into the proper inputs.
93
  image_inputs, video_inputs, video_kwargs = process_vision_info([messages], return_video_kwargs=True)
94
  fps_inputs = video_kwargs['fps']
95
  inputs = processor(
@@ -100,11 +109,8 @@ def inference(video_path, prompt, max_new_tokens=2048, total_pixels=20480 * 28 *
100
  padding=True,
101
  return_tensors="pt"
102
  )
103
- #inputs = inputs.to('cuda')
104
-
105
- # Generate the response using the model
106
  output_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
107
- # Post-process the output tokens to text.
108
  generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
109
  output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
110
  return output_text[0]
@@ -123,11 +129,9 @@ sample_prompts = [
123
  # -------------------------------------------------
124
  def process_video(video_url, custom_prompt, sample_prompt):
125
  """
126
- This function is called when a user clicks the 'Process Video' button.
127
- - It uses the custom prompt if provided; otherwise, it falls back to the selected sample prompt.
128
- - It then downloads and processes the video and calls the inference function.
129
  """
130
- # Choose the prompt: use custom prompt if not empty, else use the sample
131
  final_prompt = custom_prompt.strip() if custom_prompt.strip() != "" else sample_prompt
132
  try:
133
  video_path, frames, timestamps = get_video_frames(video_url, num_frames=64)
@@ -145,7 +149,7 @@ def process_video(video_url, custom_prompt, sample_prompt):
145
  # 6. Build the Gradio Interface
146
  # -------------------------------------------------
147
  with gr.Blocks() as demo:
148
- gr.Markdown("# YouTube Video Chapter Splitter using Qwen 2.5 VL (7B)")
149
  gr.Markdown("Enter a YouTube video URL and either type a custom prompt or select one of the sample prompts. Then click **Process Video** to generate the chapter breakdown.")
150
 
151
  with gr.Row():
@@ -158,7 +162,6 @@ with gr.Blocks() as demo:
158
  output_text = gr.Textbox(label="Output", lines=10)
159
  run_button = gr.Button("Process Video")
160
 
161
- # When the button is clicked, run the process_video function.
162
  run_button.click(fn=process_video, inputs=[video_url_input, custom_prompt_input, sample_prompt_input], outputs=output_text)
163
 
164
  # -------------------------------------------------
 
9
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
10
  from qwen_vl_utils import process_vision_info
11
  import gradio as gr
12
+ from pytube import YouTube # new import for handling YouTube links
13
 
14
  # ----------------------------------------
15
+ # 1. Initialize the Qwen 2.5 VL Model (7B) for CPU-only
16
  # ----------------------------------------
 
17
  model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
18
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
19
+ model_path,
20
+ torch_dtype=torch.float16 # use float32 for CPU
21
+ # Removed attn_implementation and device_map for CPU-only deployment
22
  )
23
  processor = AutoProcessor.from_pretrained(model_path)
24
 
 
28
 
29
  def download_video(url, dest_path):
30
  """
31
+ Download a non-YouTube video using requests.
32
  """
33
  response = requests.get(url, stream=True)
34
  with open(dest_path, 'wb') as f:
 
39
  def get_video_frames(video_path, num_frames=64, cache_dir='.cache'):
40
  """
41
  Download (if needed) and extract frames and timestamps from the video.
42
+ If the URL is a YouTube link, use pytube to download the video.
43
+ Uses caching to avoid repeated processing.
44
  """
45
  os.makedirs(cache_dir, exist_ok=True)
46
  video_hash = hashlib.md5(video_path.encode('utf-8')).hexdigest()
47
+
48
+ # Check if the video is a YouTube link
49
+ if video_path.startswith('http') and ("youtube.com" in video_path or "youtu.be" in video_path):
50
  video_file_path = os.path.join(cache_dir, f'{video_hash}.mp4')
51
  if not os.path.exists(video_file_path):
52
+ print("Downloading YouTube video using pytube...")
53
+ yt = YouTube(video_path)
54
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
55
+ stream.download(output_path=cache_dir, filename=f'{video_hash}.mp4')
56
+ # Otherwise, if it's a direct link to an mp4 or a local path
57
+ elif video_path.startswith('http'):
58
+ video_file_path = os.path.join(cache_dir, f'{video_hash}.mp4')
59
+ if not os.path.exists(video_file_path):
60
+ print("Downloading video using requests...")
61
  download_video(video_path, video_file_path)
62
  else:
63
  video_file_path = video_path
 
98
  {"video": video_path, "total_pixels": total_pixels, "min_pixels": min_pixels},
99
  ]},
100
  ]
 
101
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
102
  image_inputs, video_inputs, video_kwargs = process_vision_info([messages], return_video_kwargs=True)
103
  fps_inputs = video_kwargs['fps']
104
  inputs = processor(
 
109
  padding=True,
110
  return_tensors="pt"
111
  )
112
+ # Do not move to GPU on CPU-only environment
 
 
113
  output_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
 
114
  generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
115
  output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
116
  return output_text[0]
 
129
  # -------------------------------------------------
130
  def process_video(video_url, custom_prompt, sample_prompt):
131
  """
132
+ Called when the user clicks 'Process Video'. Uses the custom prompt if provided; otherwise uses the sample prompt.
133
+ Downloads and processes the video then runs the inference.
 
134
  """
 
135
  final_prompt = custom_prompt.strip() if custom_prompt.strip() != "" else sample_prompt
136
  try:
137
  video_path, frames, timestamps = get_video_frames(video_url, num_frames=64)
 
149
  # 6. Build the Gradio Interface
150
  # -------------------------------------------------
151
  with gr.Blocks() as demo:
152
+ gr.Markdown("# YouTube Video Chapter Splitter using Qwen 2.5 VL (7B) on CPU")
153
  gr.Markdown("Enter a YouTube video URL and either type a custom prompt or select one of the sample prompts. Then click **Process Video** to generate the chapter breakdown.")
154
 
155
  with gr.Row():
 
162
  output_text = gr.Textbox(label="Output", lines=10)
163
  run_button = gr.Button("Process Video")
164
 
 
165
  run_button.click(fn=process_video, inputs=[video_url_input, custom_prompt_input, sample_prompt_input], outputs=output_text)
166
 
167
  # -------------------------------------------------
requirements.txt CHANGED
@@ -16,4 +16,5 @@ av
16
  decord
17
  numpy
18
  Pillow
19
- requests
 
 
16
  decord
17
  numpy
18
  Pillow
19
+ requests
20
+ pytube