Faisal commited on
Commit
61663e8
·
1 Parent(s): 470d42b

Add requirements.txt and .gitignore for Hugging Face deployment

Browse files
Files changed (3) hide show
  1. .gitignore +44 -0
  2. app.py +157 -0
  3. requirements.txt +12 -0
.gitignore ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual Environment
2
+ .venv/
3
+ venv/
4
+ env/
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ *.so
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+
34
+ # OS
35
+ .DS_Store
36
+ Thumbs.db
37
+
38
+ # Logs
39
+ *.log
40
+
41
+ # Model files (if any)
42
+ *.bin
43
+ *.safetensors
44
+ models/
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, GenerationConfig
3
+ from qwen_vl_utils import process_vision_info
4
+ import torch
5
+ import requests
6
+ from IPython.display import Markdown
7
+
8
+ # ----------------------------
9
+ # MODEL LOADING (MedVLM-R1)
10
+ # ----------------------------
11
+ MODEL_PATH = 'JZPeterPan/MedVLM-R1'
12
+
13
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
14
+ MODEL_PATH,
15
+ torch_dtype=torch.bfloat16,
16
+ device_map="auto",
17
+ )
18
+ processor = AutoProcessor.from_pretrained(MODEL_PATH)
19
+
20
+ temp_generation_config = GenerationConfig(
21
+ max_new_tokens=1024,
22
+ do_sample=False,
23
+ temperature=1,
24
+ num_return_sequences=1,
25
+ pad_token_id=151643,
26
+ )
27
+
28
+ # ----------------------------
29
+ # API SETTINGS (DeepSeek R1)
30
+ # ----------------------------
31
+ api_key = "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"
32
+ deepseek_model = "deepseek/deepseek-r1"
33
+
34
+ # ----------------------------
35
+ # DEFAULT QUESTION
36
+ # ----------------------------
37
+ DEFAULT_QUESTION = "What abnormality is in the brain MRI and what is the location?\nA) Tumour\nB) No tumour"
38
+
39
+ QUESTION_TEMPLATE = """
40
+ {Question}
41
+ Your task:
42
+ 1. Think through the question step by step, enclose your reasoning process in <think>...</think> tags.
43
+ 2. Then provide the correct single-letter choice (A, B, C, D,...) inside <answer>...</answer> tags.
44
+ 3. No extra information or text outside of these tags.
45
+ """
46
+
47
+ # ----------------------------
48
+ # PIPELINE FUNCTION
49
+ # ----------------------------
50
+ def process_pipeline(image, user_question):
51
+ if image is None or user_question.strip() == "":
52
+ return "Please upload an image and enter a question."
53
+
54
+ # Combine user's question with default
55
+ combined_question = user_question.strip() + "\n\n" + DEFAULT_QUESTION
56
+
57
+ message = [{
58
+ "role": "user",
59
+ "content": [
60
+ {"type": "image", "image": image},
61
+ {"type": "text", "text": QUESTION_TEMPLATE.format(Question=combined_question)}
62
+ ]
63
+ }]
64
+
65
+ # Prepare inputs for MedVLM
66
+ text = processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
67
+ image_inputs, video_inputs = process_vision_info(message)
68
+
69
+ inputs = processor(
70
+ text=text,
71
+ images=image_inputs,
72
+ videos=video_inputs,
73
+ padding=True,
74
+ return_tensors="pt",
75
+ ).to("cuda")
76
+
77
+ # Generate output from MedVLM
78
+ generated_ids = model.generate(
79
+ **inputs,
80
+ use_cache=True,
81
+ max_new_tokens=1024,
82
+ do_sample=False,
83
+ generation_config=temp_generation_config
84
+ )
85
+
86
+ generated_ids_trimmed = [
87
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
88
+ ]
89
+ output_text = processor.batch_decode(
90
+ generated_ids_trimmed,
91
+ skip_special_tokens=True,
92
+ clean_up_tokenization_spaces=False
93
+ )[0]
94
+
95
+ # Send MedVLM output to DeepSeek R1
96
+ prompt = f"""The following is a medical AI's answer to a visual question.
97
+ The answer is about having tumour or not, focus on that mostly.
98
+ Keep the answer precise but more structured, and helpful for a medical professional.
99
+ If possible, make a table using the details from the original answer.
100
+
101
+ Original Answer:
102
+ {output_text}
103
+ """
104
+
105
+ headers = {
106
+ "Authorization": f"Bearer {api_key}",
107
+ "Content-Type": "application/json"
108
+ }
109
+ data = {
110
+ "model": deepseek_model,
111
+ "messages": [
112
+ {"role": "system", "content": "You are a highly skilled medical writer."},
113
+ {"role": "user", "content": prompt}
114
+ ]
115
+ }
116
+
117
+ response = requests.post(
118
+ "https://openrouter.ai/api/v1/chat/completions",
119
+ headers=headers,
120
+ json=data
121
+ )
122
+
123
+ try:
124
+ detailed_answer = response.json()["choices"][0]["message"]["content"]
125
+ except Exception as e:
126
+ return f"Error from DeepSeek: {str(e)}\nFull Response: {response.text}"
127
+
128
+ return detailed_answer
129
+
130
+ # ----------------------------
131
+ # GRADIO UI
132
+ # ----------------------------
133
+ with gr.Blocks(title="Brain MRI QA") as demo:
134
+ with gr.Row():
135
+ # Left column
136
+ with gr.Column():
137
+ image_input = gr.Image(type="filepath", label="Upload Medical Image")
138
+
139
+ # Right column
140
+ with gr.Column():
141
+ question_box = gr.Textbox(label="Your Question about the Image", placeholder="Type your question here...")
142
+ submit_btn = gr.Button("Submit")
143
+ clear_btn = gr.Button("Clear")
144
+ llm_output = gr.Textbox(label="Detailed LLM Answer", interactive=False, lines=10)
145
+
146
+ submit_btn.click(
147
+ fn=process_pipeline,
148
+ inputs=[image_input, question_box],
149
+ outputs=llm_output
150
+ )
151
+ clear_btn.click(
152
+ fn=lambda: ("", ""),
153
+ outputs=[question_box, llm_output]
154
+ )
155
+
156
+ if __name__ == "__main__":
157
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.42.0
2
+ transformers>=4.40.0
3
+ torch>=2.0.0
4
+ torchvision>=0.15.0
5
+ requests>=2.31.0
6
+ Pillow>=10.0.0
7
+ accelerate>=0.20.0
8
+ safetensors>=0.3.0
9
+ tokenizers>=0.15.0
10
+ numpy>=1.24.0
11
+ scipy>=1.10.0
12
+ qwen-vl-utils