Spaces:

Arxived
/

qwen

Sleeping

App Files Files Community

qwen / app.py

DrishtiSharma

Update app.py

8d5517e verified 6 months ago

raw

history blame contribute delete

3.63 kB

	import streamlit as st
	from yt_dlp import YoutubeDL
	from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
	from qwen_vl_utils import process_vision_info
	import torch
	import subprocess
	import sys

	def install_dependencies():
	try:
	# Install torch first
	subprocess.check_call([sys.executable, "-m", "pip", "install", "torch==2.0.1"])
	# Install flash-attn after torch
	subprocess.check_call([sys.executable, "-m", "pip", "install", "flash-attn==2.7.2.post1"])
	# Install other dependencies
	subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
	except subprocess.CalledProcessError as e:
	print(f"Error occurred while installing dependencies: {e}")
	sys.exit(1)

	# Call the function to install dependencies
	install_dependencies()


	# Title and Description
	st.title("Video Analysis with Qwen2-VL")
	st.markdown("""
	This app downloads a YouTube video, processes it, and analyzes it using the Qwen2-VL model.
	""")

	# User input for YouTube URL
	url = st.text_input("Enter YouTube Video URL:", value="https://www.youtube.com/watch?v=MCWJNOfJoSM")

	if st.button("Analyze Video"):
	with st.spinner("Downloading video..."):
	ydl_opts = {
	"format": "best",
	"outtmpl": "football.mp4"
	}
	try:
	with YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])
	st.success("Video downloaded successfully!")
	except Exception as e:
	st.error(f"Error downloading video: {e}")
	st.stop()

	with st.spinner("Loading model..."):
	MODEL_NAME = "Qwen/Qwen2-VL-7B-Instruct"

	try:
	model = Qwen2VLForConditionalGeneration.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	attn_implementation="flash_attention_2",
	)
	processor = AutoProcessor.from_pretrained(MODEL_NAME)
	st.success("Model loaded successfully!")
	except Exception as e:
	st.error(f"Error loading model: {e}")
	st.stop()

	# Process video and generate response
	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "video",
	"video": "football.mp4",
	"max_pixels": 1280 * 780,
	"fps": 0.1,
	},
	{"type": "text", "text": "What's happening in the video? Who wins the penalty shootout?"},
	],
	}
	]

	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	image_inputs, video_inputs = process_vision_info(messages)

	inputs = processor(
	text=[text],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	)
	inputs = inputs.to("cuda")

	with st.spinner("Generating response..."):
	try:
	generated_ids = model.generate(**inputs, max_new_tokens=512)
	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]

	output_text = processor.batch_decode(
	generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)
	st.success("Response generated!")
	st.text_area("Model Output:", value=output_text[0], height=200)
	except Exception as e:
	st.error(f"Error generating response: {e}")