Lemorra commited on
Commit
16dca74
Β·
1 Parent(s): ec7fb79

πŸŽ‰ Added support for Qwen2_5 model

Browse files
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "src/app:app", "--host", "0.0.0.0", "--port", "7860"]
requirement.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ git+https://github.com/huggingface/transformers
4
+ accelerate
5
+ qwen-vl-utils[decord]==0.0.8
6
+ python-dotenv
7
+ PyJWT
8
+ pydantic
9
+ torch
src/__pycache__/app.cpython-310.pyc ADDED
Binary file (1.19 kB). View file
 
src/app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pyexpat import model
2
+ from typing import Annotated
3
+ from fastapi import FastAPI, Depends
4
+ from .utils.authentication import verify_token
5
+ from .utils.payload_model import SingleInferencePayload, VideoInferencePayload
6
+ from .utils.qwen_inference import Qwen2_5, get_single_inference, get_video_inference
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ model_path = os.getenv("MODEL_PATH")
14
+
15
+ model_object = Qwen2_5(model_path)
16
+
17
+ app = FastAPI()
18
+
19
+ @app.get("/")
20
+ def greet_json():
21
+ return {
22
+ "message": "Welcome! The backend API for Qwen2.5-VL-3B-Instruct model is running.",
23
+ "status": "active"
24
+ }
25
+
26
+ @app.post("/single_inference")
27
+ def single_inference(payload: SingleInferencePayload, _token: Annotated[dict, Depends(verify_token)]):
28
+ return model_object.get_single_inference(payload)
29
+
30
+ @app.post("/video_inference")
31
+ def video_inference(payload: VideoInferencePayload, _token: Annotated[dict, Depends(verify_token)]):
32
+ return model_object.get_video_inference(payload)
src/utils/__init__.py ADDED
File without changes
src/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (236 Bytes). View file
 
src/utils/__pycache__/authentication.cpython-310.pyc ADDED
Binary file (1.02 kB). View file
 
src/utils/__pycache__/payload_model.cpython-310.pyc ADDED
Binary file (728 Bytes). View file
 
src/utils/__pycache__/qwen_inference.cpython-310.pyc ADDED
Binary file (712 Bytes). View file
 
src/utils/authentication.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import HTTPException, Header
2
+ import jwt
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ def get_secret_key():
9
+ return os.getenv("SECRET_KEY")
10
+
11
+ async def verify_token(authorization: str = Header(...)):
12
+ try:
13
+ token_type, token = authorization.split()
14
+ if token_type.lower() != "bearer":
15
+ raise HTTPException(status_code=401, detail="Invalid token type")
16
+ return jwt.decode(token, get_secret_key(), algorithms=["HS256"])
17
+ except jwt.ExpiredSignatureError:
18
+ raise HTTPException(status_code=401, detail="Token has expired")
19
+ except (jwt.InvalidTokenError, IndexError):
20
+ raise HTTPException(status_code=401, detail="Invalid token")
src/utils/payload_model.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class SingleInferencePayload(BaseModel):
4
+ image_path: str
5
+ question: str
6
+
7
+ class VideoInferencePayload(BaseModel):
8
+ video_path: str
9
+ question: list[str]
src/utils/qwen_inference.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .payload_model import SingleInferencePayload, VideoInferencePayload
2
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
3
+ from qwen_vl_utils import process_vision_info
4
+ from pydantic import BaseModel
5
+ import torch
6
+
7
+
8
+ class Qwen2_5(BaseModel):
9
+ def __init__(self, model_path: str):
10
+ self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
11
+ model_path, torch_dtype="auto", device_map="auto"
12
+ )
13
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
14
+ self.processor = AutoProcessor.from_pretrained(model_path)
15
+
16
+ def prepare_single_inference(self, image: str, question: str):
17
+ image = f"data:image;base64,{image}"
18
+ messages = [
19
+ {
20
+ "role": "user",
21
+ "content": [
22
+ {
23
+ "type": "text",
24
+ "image": image,
25
+
26
+ },
27
+ {
28
+ "type": "text",
29
+ "text": question
30
+ },
31
+ ],
32
+ }
33
+ ]
34
+ text = self.processor.apply_chat_template(
35
+ messages, tokenize=False, add_generation_prompt=True
36
+ )
37
+ image_inputs, video_inputs = process_vision_info(messages)
38
+ inputs = self.processor(
39
+ text=[text],
40
+ images=image_inputs,
41
+ videos=video_inputs,
42
+ padding=True,
43
+ return_tensors="pt",
44
+ )
45
+ inputs = inputs.to("cuda")
46
+
47
+ return inputs
48
+
49
+ def prepare_video_inference(self, video: list[str], question: str):
50
+ base64_videos = []
51
+ for frame in video:
52
+ base64_videos.append(f"data:image;base64,{frame}")
53
+ messages = [
54
+ {
55
+ "role": "user",
56
+ "content": [
57
+ {
58
+ "type": "video",
59
+ "video": base64_videos,
60
+ },
61
+ {
62
+ "type": "text",
63
+ "text": question
64
+ },
65
+ ],
66
+ }
67
+ ]
68
+ text = self.processor.apply_chat_template(
69
+ messages, tokenize=False, add_generation_prompt=True
70
+ )
71
+ image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True)
72
+ inputs = self.processor(
73
+ text=[text],
74
+ images=image_inputs,
75
+ videos=video_inputs,
76
+ fps=1.0,
77
+ padding=True,
78
+ return_tensors="pt",
79
+ **video_kwargs,
80
+ )
81
+ inputs = inputs.to("cuda")
82
+ return inputs
83
+
84
+ def get_single_inference(self, payload: SingleInferencePayload):
85
+ try:
86
+ processed_inputs = self.prepare_single_inference(payload.image_path, payload.question)
87
+ generated_ids = self.model.generate(**processed_inputs, max_new_tokens=128)
88
+ generated_ids_trimmed = [
89
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(processed_inputs.input_ids, generated_ids)
90
+ ]
91
+ output_text = self.processor.batch_decode(
92
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
93
+ )
94
+ print(f"Model generated text: {output_text}")
95
+ return {
96
+ "message": output_text,
97
+ "status": 200
98
+ }
99
+ except Exception as e:
100
+ return {
101
+ "message": str(e),
102
+ "status": 500
103
+ }
104
+
105
+ def get_video_inference(self, payload: VideoInferencePayload):
106
+ try:
107
+ processed_inputs = self.prepare_video_inference(payload.video_path, payload.question)
108
+ generated_ids = self.model.generate(**processed_inputs, max_new_tokens=128)
109
+ generated_ids_trimmed = [
110
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(processed_inputs.input_ids, generated_ids)
111
+ ]
112
+ output_text = self.processor.batch_decode(
113
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
114
+ )
115
+ print(f"Model generated text: {output_text}")
116
+ return {
117
+ "message": output_text,
118
+ "status": 200
119
+ }
120
+ except Exception as e:
121
+ return {
122
+ "message": str(e),
123
+ "status": 500
124
+ }